You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by to...@apache.org on 2016/09/26 12:05:03 UTC
[1/7] incubator-joshua git commit: static analysis based code
improvements on lattice package
Repository: incubator-joshua
Updated Branches:
refs/heads/master 9c6ae40ba -> 69302df5c
static analysis based code improvements on lattice package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2d3911c3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2d3911c3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2d3911c3
Branch: refs/heads/master
Commit: 2d3911c36c456ed67ddc427af0adda641f217980
Parents: 9c6ae40
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 13:44:54 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 13:44:54 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/joshua/lattice/Arc.java | 26 ++++-----
.../java/org/apache/joshua/lattice/Lattice.java | 57 ++++++++++----------
.../java/org/apache/joshua/lattice/Node.java | 26 ++++-----
3 files changed, 50 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2d3911c3/src/main/java/org/apache/joshua/lattice/Arc.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/Arc.java b/src/main/java/org/apache/joshua/lattice/Arc.java
index 5d056ab..2853c26 100644
--- a/src/main/java/org/apache/joshua/lattice/Arc.java
+++ b/src/main/java/org/apache/joshua/lattice/Arc.java
@@ -31,22 +31,22 @@ public class Arc<Label> {
/**
* Weight of this arc.
*/
- private float cost;
+ private final float cost;
/**
* Node where this arc ends.
*/
- private Node<Label> head;
+ private final Node<Label> head;
/**
* Node where this arc begins.
*/
- private Node<Label> tail;
+ private final Node<Label> tail;
/**
* Label associated with this arc.
*/
- private Label label;
+ private final Label label;
/**
* Creates an arc with the specified head, tail, cost, and label.
@@ -101,17 +101,13 @@ public class Arc<Label> {
@Override
public String toString() {
- StringBuilder s = new StringBuilder();
-
- s.append(label.toString());
- s.append(" : ");
- s.append(tail.toString());
- s.append(" ==> ");
- s.append(head.toString());
- s.append(" : ");
- s.append(cost);
-
- return s.toString();
+ return label.toString() +
+ " : " +
+ tail.toString() +
+ " ==> " +
+ head.toString() +
+ " : " +
+ cost;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2d3911c3/src/main/java/org/apache/joshua/lattice/Lattice.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/Lattice.java b/src/main/java/org/apache/joshua/lattice/Lattice.java
index c557c07..340d717 100644
--- a/src/main/java/org/apache/joshua/lattice/Lattice.java
+++ b/src/main/java/org/apache/joshua/lattice/Lattice.java
@@ -59,7 +59,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
/**
* List of all nodes in the lattice. Nodes are assumed to be in topological order.
*/
- private List<Node<Value>> nodes;
+ private final List<Node<Value>> nodes;
JoshuaConfiguration config = null;
@@ -95,16 +95,16 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
*/
public Lattice(Value[] linearChain, JoshuaConfiguration config) {
this.latticeHasAmbiguity = false;
- this.nodes = new ArrayList<Node<Value>>();
+ this.nodes = new ArrayList<>();
- Node<Value> previous = new Node<Value>(0);
+ Node<Value> previous = new Node<>(0);
nodes.add(previous);
int i = 1;
for (Value value : linearChain) {
- Node<Value> current = new Node<Value>(i);
+ Node<Value> current = new Node<>(i);
float cost = 0.0f;
// if (i > 4) cost = (float)i/1.53432f;
previous.addArc(current, cost, value);
@@ -151,11 +151,11 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
integerSentence[i] = new Token(tokens[i], config);
}
- return new Lattice<Token>(integerSentence, config);
+ return new Lattice<>(integerSentence, config);
}
public static Lattice<Token> createTokenLatticeFromPLF(String data, JoshuaConfiguration config) {
- ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>();
+ ArrayList<Node<Token>> nodes = new ArrayList<>();
// This matches a sequence of tuples, which describe arcs leaving this node
Pattern nodePattern = Pattern.compile("(.+?)\\(\\s*(\\(.+?\\),\\s*)\\s*\\)(.*)");
@@ -172,7 +172,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
boolean latticeIsAmbiguous = false;
int nodeID = 0;
- Node<Token> startNode = new Node<Token>(nodeID);
+ Node<Token> startNode = new Node<>(nodeID);
nodes.add(startNode);
while (nodeMatcher.matches()) {
@@ -182,13 +182,13 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
nodeID++;
- Node<Token> currentNode = null;
+ Node<Token> currentNode;
if (nodeID < nodes.size() && nodes.get(nodeID) != null) {
currentNode = nodes.get(nodeID);
} else {
- currentNode = new Node<Token>(nodeID);
+ currentNode = new Node<>(nodeID);
while (nodeID > nodes.size())
- nodes.add(new Node<Token>(nodes.size()));
+ nodes.add(new Node<>(nodes.size()));
nodes.add(currentNode);
}
@@ -207,9 +207,9 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
if (destinationNodeID < nodes.size() && nodes.get(destinationNodeID) != null) {
destinationNode = nodes.get(destinationNodeID);
} else {
- destinationNode = new Node<Token>(destinationNodeID);
+ destinationNode = new Node<>(destinationNodeID);
while (destinationNodeID > nodes.size())
- nodes.add(new Node<Token>(nodes.size()));
+ nodes.add(new Node<>(nodes.size()));
nodes.add(destinationNode);
}
@@ -234,11 +234,11 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
/* Add </s> as a final state, connect it to the previous end-state */
nodeID = nodes.get(nodes.size()-1).getNumber() + 1;
- Node<Token> endNode = new Node<Token>(nodeID);
+ Node<Token> endNode = new Node<>(nodeID);
nodes.get(nodes.size()-1).addArc(endNode, 0.0f, new Token(Vocabulary.STOP_SYM, config));
nodes.add(endNode);
- return new Lattice<Token>(nodes, latticeIsAmbiguous, config);
+ return new Lattice<>(nodes, latticeIsAmbiguous, config);
}
/**
@@ -250,7 +250,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
*/
public static Lattice<String> createStringLatticeFromString(String data, JoshuaConfiguration config) {
- Map<Integer, Node<String>> nodes = new HashMap<Integer, Node<String>>();
+ Map<Integer, Node<String>> nodes = new HashMap<>();
Pattern nodePattern = Pattern.compile("(.+?)\\((\\(.+?\\),)\\)(.*)");
Pattern arcPattern = Pattern.compile("\\('(.+?)',(\\d+.\\d+),(\\d+)\\),(.*)");
@@ -270,7 +270,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
if (nodes.containsKey(nodeID)) {
currentNode = nodes.get(nodeID);
} else {
- currentNode = new Node<String>(nodeID);
+ currentNode = new Node<>(nodeID);
nodes.put(nodeID, currentNode);
}
@@ -287,7 +287,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
if (nodes.containsKey(destinationNodeID)) {
destinationNode = nodes.get(destinationNodeID);
} else {
- destinationNode = new Node<String>(destinationNodeID);
+ destinationNode = new Node<>(destinationNodeID);
nodes.put(destinationNodeID, destinationNode);
}
@@ -303,12 +303,12 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
nodeMatcher = nodePattern.matcher(remainingData);
}
- List<Node<String>> nodeList = new ArrayList<Node<String>>(nodes.values());
+ List<Node<String>> nodeList = new ArrayList<>(nodes.values());
Collections.sort(nodeList, new NodeIdentifierComparator());
LOG.debug("Nodelist={}", nodeList);
- return new Lattice<String>(nodeList, config);
+ return new Lattice<>(nodeList, config);
}
/**
@@ -380,12 +380,11 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
* Note: This method assumes no backward arcs. If there are backward arcs, the returned shortest
* path costs for that node may not be accurate.
*
- * @param nodes A list of nodes which must be in topological order.
* @return The all-pairs shortest path for all pairs of nodes.
*/
private ChartSpan<Integer> calculateAllPairsShortestPath() {
- ChartSpan<Integer> distance = new ChartSpan<Integer>(nodes.size() - 1, Integer.MAX_VALUE);
+ ChartSpan<Integer> distance = new ChartSpan<>(nodes.size() - 1, Integer.MAX_VALUE);
distance.setDiagonal(0);
/* Mark reachability between immediate neighbors */
@@ -426,9 +425,9 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
public static void main(String[] args) {
- List<Node<String>> nodes = new ArrayList<Node<String>>();
+ List<Node<String>> nodes = new ArrayList<>();
for (int i = 0; i < 4; i++) {
- nodes.add(new Node<String>(i));
+ nodes.add(new Node<>(i));
}
nodes.get(0).addArc(nodes.get(1), 1.0f, "x");
@@ -437,7 +436,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
nodes.get(2).addArc(nodes.get(3), 3.0f, "b");
nodes.get(2).addArc(nodes.get(3), 5.0f, "c");
- Lattice<String> graph = new Lattice<String>(nodes, null);
+ Lattice<String> graph = new Lattice<>(nodes, null);
System.out.println("Shortest path from 0 to 3: " + graph.getShortestPath(0, 3));
}
@@ -478,7 +477,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
*/
public static Lattice<String> createFromString(String data) {
- Map<Integer,Node<String>> nodes = new HashMap<Integer,Node<String>>();
+ Map<Integer,Node<String>> nodes = new HashMap<>();
Pattern nodePattern = Pattern.compile("(.+?)\\((\\(.+?\\),)\\)(.*)");
Pattern arcPattern = Pattern.compile("\\('(.+?)',(\\d+.\\d+),(\\d+)\\),(.*)");
@@ -498,7 +497,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
if (nodes.containsKey(nodeID)) {
currentNode = nodes.get(nodeID);
} else {
- currentNode = new Node<String>(nodeID);
+ currentNode = new Node<>(nodeID);
nodes.put(nodeID, currentNode);
}
@@ -515,7 +514,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
if (nodes.containsKey(destinationNodeID)) {
destinationNode = nodes.get(destinationNodeID);
} else {
- destinationNode = new Node<String>(destinationNodeID);
+ destinationNode = new Node<>(destinationNodeID);
nodes.put(destinationNodeID, destinationNode);
}
@@ -531,11 +530,11 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
nodeMatcher = nodePattern.matcher(remainingData);
}
- List<Node<String>> nodeList = new ArrayList<Node<String>>(nodes.values());
+ List<Node<String>> nodeList = new ArrayList<>(nodes.values());
Collections.sort(nodeList, new NodeIdentifierComparator());
LOG.debug("Nodelist={}", nodeList);
- return new Lattice<String>(nodeList, new JoshuaConfiguration());
+ return new Lattice<>(nodeList, new JoshuaConfiguration());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2d3911c3/src/main/java/org/apache/joshua/lattice/Node.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/Node.java b/src/main/java/org/apache/joshua/lattice/Node.java
index ecff22e..c1fd2d0 100644
--- a/src/main/java/org/apache/joshua/lattice/Node.java
+++ b/src/main/java/org/apache/joshua/lattice/Node.java
@@ -59,7 +59,7 @@ public class Node<Label> {
*/
public Node(int id) {
this.id = id;
- this.outgoingArcs = new ArrayList<Arc<Label>>();
+ this.outgoingArcs = new ArrayList<>();
}
@@ -108,22 +108,18 @@ public class Node<Label> {
public Iterable<Node<Label>> reachableNodes() {
final Iterator<Arc<Label>> arcIterator = outgoingArcs.iterator();
- return new Iterable<Node<Label>>() {
- public Iterator<Node<Label>> iterator() {
- return new Iterator<Node<Label>>() {
+ return () -> new Iterator<Node<Label>>() {
- public boolean hasNext() {
- return arcIterator.hasNext();
- }
+ public boolean hasNext() {
+ return arcIterator.hasNext();
+ }
- public Node<Label> next() {
- return arcIterator.next().getHead();
- }
+ public Node<Label> next() {
+ return arcIterator.next().getHead();
+ }
- public void remove() {
- throw new UnsupportedOperationException();
- }
- };
+ public void remove() {
+ throw new UnsupportedOperationException();
}
};
}
@@ -138,7 +134,7 @@ public class Node<Label> {
* @param label Label of the new outgoing arc.
*/
public void addArc(Node<Label> destination, float weight, Label label) {
- outgoingArcs.add(new Arc<Label>(this, destination, weight, label));
+ outgoingArcs.add(new Arc<>(this, destination, weight, label));
}
[7/7] incubator-joshua git commit: static analysis based code
improvements on subsample package
Posted by to...@apache.org.
static analysis based code improvements on subsample package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/69302df5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/69302df5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/69302df5
Branch: refs/heads/master
Commit: 69302df5cdf94aff9f27257946737f8e47f9bde4
Parents: 1d012a1
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 14:04:27 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 14:04:27 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/subsample/Alignment.java | 12 ++++++------
.../org/apache/joshua/subsample/BiCorpus.java | 16 +++++++---------
.../apache/joshua/subsample/PhraseReader.java | 2 +-
.../org/apache/joshua/subsample/Subsampler.java | 19 +++++++------------
4 files changed, 21 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/69302df5/src/main/java/org/apache/joshua/subsample/Alignment.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/Alignment.java b/src/main/java/org/apache/joshua/subsample/Alignment.java
index 073eb5c..15a87e8 100644
--- a/src/main/java/org/apache/joshua/subsample/Alignment.java
+++ b/src/main/java/org/apache/joshua/subsample/Alignment.java
@@ -28,9 +28,9 @@ package org.apache.joshua.subsample;
* @version $LastChangedDate$
*/
public class Alignment {
- private short eLength;
- private short fLength;
- private M2 aligned;
+ private final short eLength;
+ private final short fLength;
+ private final M2 aligned;
public Alignment(short fLength, short eLength, String alignments) {
this.eLength = eLength;
@@ -55,7 +55,7 @@ public class Alignment {
public String toString() {
- StringBuffer sb = new StringBuffer();
+ StringBuilder sb = new StringBuilder();
for (short i = 0; i < fLength; i++)
for (short j = 0; j < eLength; j++)
if (aligned.get(i, j)) sb.append(i).append('-').append(j).append(' ');
@@ -69,8 +69,8 @@ public class Alignment {
/** A (short,short)->boolean map for storing alignments. */
private final static class M2 {
- private short width;
- private boolean[] bits;
+ private final short width;
+ private final boolean[] bits;
public M2(short f, short e) {
width = f;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/69302df5/src/main/java/org/apache/joshua/subsample/BiCorpus.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/BiCorpus.java b/src/main/java/org/apache/joshua/subsample/BiCorpus.java
index 06ec0e9..b042391 100644
--- a/src/main/java/org/apache/joshua/subsample/BiCorpus.java
+++ b/src/main/java/org/apache/joshua/subsample/BiCorpus.java
@@ -64,7 +64,7 @@ public class BiCorpus implements Iterable<PhrasePair> {
* @throws IndexOutOfBoundsException todo
*/
public BiCorpus(String foreignFileName, String nativeFileName, String alignmentFileName)
- throws IOException, IllegalArgumentException, IndexOutOfBoundsException {
+ throws IllegalArgumentException, IndexOutOfBoundsException {
this.foreignFileName = foreignFileName;
this.nativeFileName = nativeFileName;
this.alignmentFileName = alignmentFileName;
@@ -73,9 +73,7 @@ public class BiCorpus implements Iterable<PhrasePair> {
// Of course, that will be checked for in each iteration
//
// We write it this way to avoid warnings from the foreach style loop
- Iterator<PhrasePair> it = iterator();
- while (it.hasNext()) {
- it.next();
+ for (PhrasePair phrasePair : this) {
}
}
@@ -95,9 +93,9 @@ public class BiCorpus implements Iterable<PhrasePair> {
*/
@SuppressWarnings("resource")
public Iterator<PhrasePair> iterator() {
- PhraseReader closureRF = null;
- PhraseReader closureRE = null;
- BufferedReader closureRA = null;
+ PhraseReader closureRF;
+ PhraseReader closureRE;
+ BufferedReader closureRA;
try {
closureRF = new PhraseReader(new FileReader(this.foreignFileName), (byte) 1);
closureRE = new PhraseReader(new FileReader(this.nativeFileName), (byte) 0);
@@ -134,7 +132,7 @@ public class BiCorpus implements Iterable<PhrasePair> {
if (this.hasNext()) {
Phrase f = this.nextForeignPhrase;
- Phrase e = null;
+ Phrase e;
try {
e = re.readPhrase();
} catch (IOException ioe) {
@@ -146,7 +144,7 @@ public class BiCorpus implements Iterable<PhrasePair> {
} else {
if (e.size() != 0 && f.size() != 0) {
if (null != ra) {
- String line = null;
+ String line;
try {
line = ra.readLine();
} catch (IOException ioe) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/69302df5/src/main/java/org/apache/joshua/subsample/PhraseReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/PhraseReader.java b/src/main/java/org/apache/joshua/subsample/PhraseReader.java
index 6db216f..21579c9 100644
--- a/src/main/java/org/apache/joshua/subsample/PhraseReader.java
+++ b/src/main/java/org/apache/joshua/subsample/PhraseReader.java
@@ -32,7 +32,7 @@ import org.apache.joshua.corpus.BasicPhrase;
* @version $LastChangedDate$
*/
public class PhraseReader extends BufferedReader {
- private byte language;
+ private final byte language;
public PhraseReader(Reader r, byte language) {
super(r);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/69302df5/src/main/java/org/apache/joshua/subsample/Subsampler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/Subsampler.java b/src/main/java/org/apache/joshua/subsample/Subsampler.java
index 36e1925..04662c9 100644
--- a/src/main/java/org/apache/joshua/subsample/Subsampler.java
+++ b/src/main/java/org/apache/joshua/subsample/Subsampler.java
@@ -48,9 +48,9 @@ public class Subsampler {
private static final Logger LOG = LoggerFactory.getLogger(Subsampler.class);
protected Map<Phrase, Integer> ngramCounts;
- protected int maxN;
- protected int targetCount;
- protected int maxSubsample = 1500000;
+ protected final int maxN;
+ protected final int targetCount;
+ protected final int maxSubsample = 1500000;
protected static final int MAX_SENTENCE_LENGTH = 100;
protected static final int MIN_RATIO_LENGTH = 10;
@@ -63,22 +63,19 @@ public class Subsampler {
}
private HashMap<Phrase, Integer> loadNgrams(String[] files) throws IOException {
- HashMap<Phrase, Integer> map = new HashMap<Phrase, Integer>();
+ HashMap<Phrase, Integer> map = new HashMap<>();
for (String fn : files) {
LOG.debug("Loading test set from {}", fn);
- PhraseReader reader = new PhraseReader(new FileReader(fn), (byte) 1);
Phrase phrase;
int lineCount = 0;
- try {
+ try (PhraseReader reader = new PhraseReader(new FileReader(fn), (byte) 1)) {
while ((phrase = reader.readPhrase()) != null) {
lineCount++;
List<Phrase> ngrams = phrase.getSubPhrases(this.maxN);
for (Phrase ngram : ngrams)
map.put(ngram, 0);
}
- } finally {
- reader.close();
}
LOG.debug("Processed {} lines in {}", lineCount, fn);
}
@@ -121,7 +118,7 @@ public class Subsampler {
BiCorpusFactory bcFactory) throws IOException {
try {
// Read filenames into a list
- List<String> files = new ArrayList<String>();
+ List<String> files = new ArrayList<>();
{
FileReader fr = null;
BufferedReader br = null;
@@ -148,7 +145,7 @@ public class Subsampler {
BiCorpus bc = bcFactory.fromFiles(f);
- HashMap<PhrasePair, PhrasePair> set = new HashMap<PhrasePair, PhrasePair>();
+ HashMap<PhrasePair, PhrasePair> set = new HashMap<>();
int binsize = 10; // BUG: Magic-Number
int max_k = MAX_SENTENCE_LENGTH / binsize;
@@ -181,8 +178,6 @@ public class Subsampler {
// does profiling show it helps? We only
// do it once per file, so it's not a
// performance blackhole.
- set = null;
- bc = null;
System.gc();
}
} finally {
[3/7] incubator-joshua git commit: static analysis based code
improvements on mira package
Posted by to...@apache.org.
static analysis based code improvements on mira package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/0fc2f497
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/0fc2f497
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/0fc2f497
Branch: refs/heads/master
Commit: 0fc2f497e222b75eb448440af41afbd114c27e0d
Parents: 23ebc37
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 13:52:38 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 13:52:38 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/joshua/mira/MIRACore.java | 296 ++++++++++-------
.../java/org/apache/joshua/mira/Optimizer.java | 333 +++++++++----------
2 files changed, 333 insertions(+), 296 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0fc2f497/src/main/java/org/apache/joshua/mira/MIRACore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/mira/MIRACore.java b/src/main/java/org/apache/joshua/mira/MIRACore.java
index a4a6b84..9ba046d 100755
--- a/src/main/java/org/apache/joshua/mira/MIRACore.java
+++ b/src/main/java/org/apache/joshua/mira/MIRACore.java
@@ -33,6 +33,7 @@ import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -62,12 +63,11 @@ public class MIRACore {
private static final Logger LOG = LoggerFactory.getLogger(MIRACore.class);
private final JoshuaConfiguration joshuaConfiguration;
- private TreeSet<Integer>[] indicesOfInterest_all;
private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
- private final static double NegInf = (-1.0 / 0.0);
- private final static double PosInf = (+1.0 / 0.0);
+ private final static double NegInf = Double.NEGATIVE_INFINITY;
+ private final static double PosInf = Double.POSITIVE_INFINITY;
private final static double epsilon = 1.0 / 1000000;
private int verbosity; // anything of priority <= verbosity will be printed
@@ -83,7 +83,6 @@ public class MIRACore {
// number of documents in the dev set
// this should be 1, unless doing doc-level optimization
- private int[] docOfSentence;
// docOfSentence[i] stores which document contains the i'th sentence.
// docOfSentence is 0-indexed, as are the documents (i.e. first doc is indexed 0)
@@ -129,9 +128,9 @@ public class MIRACore {
/* *********************************************************** */
// private double[] lambda;
- private ArrayList<Double> lambda = new ArrayList<Double>();
+ private ArrayList<Double> lambda = new ArrayList<>();
// the current weight vector. NOTE: indexing starts at 1.
- private ArrayList<Double> bestLambda = new ArrayList<Double>();
+ private final ArrayList<Double> bestLambda = new ArrayList<>();
// the best weight vector across all iterations
private boolean[] isOptimizable;
@@ -153,7 +152,6 @@ public class MIRACore {
private Decoder myDecoder;
// COMMENT OUT if decoder is not Joshua
- private String decoderCommand;
// the command that runs the decoder; read from decoderCommandFileName
private int decVerbosity;
@@ -163,7 +161,6 @@ public class MIRACore {
private int validDecoderExitValue;
// return value from running the decoder command that indicates success
- private int numOptThreads;
// number of threads to run things in parallel
private int saveInterFiles;
@@ -244,9 +241,9 @@ public class MIRACore {
private double prevMetricScore = 0; // final metric score of the previous iteration, used only
// when returnBest = true
- private String dirPrefix; // where are all these files located?
private String paramsFileName, docInfoFileName, finalLambdaFileName;
- private String sourceFileName, refFileName, decoderOutFileName;
+ private String refFileName;
+ private String decoderOutFileName;
private String decoderConfigFileName, decoderCommandFileName;
private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
@@ -260,21 +257,21 @@ public class MIRACore {
this.joshuaConfiguration = joshuaConfiguration;
}
- public MIRACore(String[] args, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
+ public MIRACore(String[] args, JoshuaConfiguration joshuaConfiguration) throws IOException {
this.joshuaConfiguration = joshuaConfiguration;
EvaluationMetric.set_knownMetrics();
processArgsArray(args);
initialize(0);
}
- public MIRACore(String configFileName, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
+ public MIRACore(String configFileName, JoshuaConfiguration joshuaConfiguration) throws IOException {
this.joshuaConfiguration = joshuaConfiguration;
EvaluationMetric.set_knownMetrics();
processArgsArray(cfgFileToArgsArray(configFileName));
initialize(0);
}
- private void initialize(int randsToSkip) throws FileNotFoundException, IOException {
+ private void initialize(int randsToSkip) throws IOException {
println("NegInf: " + NegInf + ", PosInf: " + PosInf + ", epsilon: " + epsilon, 4);
randGen = new Random(seed);
@@ -336,8 +333,8 @@ public class MIRACore {
// and one line for the normalization method
// indexing starts at 1 in these arrays
for (int p = 0; p <= numParams; ++p)
- lambda.add(new Double(0));
- bestLambda.add(new Double(0));
+ lambda.add(0d);
+ bestLambda.add(0d);
// why only lambda is a list? because the size of lambda
// may increase over time, but other arrays are specified in
// the param config file, only used for initialization
@@ -355,6 +352,7 @@ public class MIRACore {
String[][] refSentences = new String[numSentences][refsPerSen];
+ String decoderCommand;
try {
// read in reference sentences
@@ -489,10 +487,10 @@ public class MIRACore {
@SuppressWarnings("unchecked")
TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
- indicesOfInterest_all = temp_TSA;
+ TreeSet<Integer>[] indicesOfInterest_all = temp_TSA;
for (int i = 0; i < numSentences; ++i) {
- indicesOfInterest_all[i] = new TreeSet<Integer>();
+ indicesOfInterest_all[i] = new TreeSet<>();
}
} // void initialize(...)
@@ -517,9 +515,9 @@ public class MIRACore {
if (folder.exists()) {
File[] listOfFiles = folder.listFiles();
- for (int i = 0; i < listOfFiles.length; i++) {
- if (listOfFiles[i].isFile()) {
- files = listOfFiles[i].getName();
+ for (File listOfFile : listOfFiles) {
+ if (listOfFile.isFile()) {
+ files = listOfFile.getName();
if (files.startsWith("MIRA.temp")) {
deleteFile(files);
}
@@ -622,11 +620,11 @@ public class MIRACore {
// save feats and stats for all candidates(old & new)
HashMap<String, String>[] feat_hash = new HashMap[numSentences];
for (int i = 0; i < numSentences; i++)
- feat_hash[i] = new HashMap<String, String>();
+ feat_hash[i] = new HashMap<>();
HashMap<String, String>[] stats_hash = new HashMap[numSentences];
for (int i = 0; i < numSentences; i++)
- stats_hash[i] = new HashMap<String, String>();
+ stats_hash[i] = new HashMap<>();
while (!done) { // NOTE: this "loop" will only be carried out once
println("--- Starting MIRA iteration #" + iteration + " @ " + (new Date()) + " ---", 1);
@@ -714,7 +712,7 @@ public class MIRACore {
candCount[i] = 0;
lastUsedIndex[i] = -1;
// suffStats_array[i].clear();
- suffStats_array[i] = new ConcurrentHashMap<Integer, int[]>();
+ suffStats_array[i] = new ConcurrentHashMap<>();
}
// initLambda[0] is not used!
@@ -843,7 +841,7 @@ public class MIRACore {
// (It's not actually a bug, but only because existingCandStats gets
// cleared before moving to the next source sentence.)
// FIX: should be made an array, indexed by i
- HashMap<String, String> existingCandStats = new HashMap<String, String>();
+ HashMap<String, String> existingCandStats = new HashMap<>();
// VERY IMPORTANT:
// A CANDIDATE X MAY APPEARED IN ITER 1, ITER 3
// BUT IF THE USER SPECIFIED TO CONSIDER ITERATIONS FROM ONLY ITER 2, THEN
@@ -930,7 +928,7 @@ public class MIRACore {
String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest + 1];
- Vector<String> unknownCands_V = new Vector<String>();
+ Vector<String> unknownCands_V = new Vector<>();
// which candidates (of the i'th source sentence) have not been seen before
// this iteration?
@@ -1108,7 +1106,7 @@ public class MIRACore {
// initialized as zero anyway
if (featId > numParams) {
++numParams;
- lambda.add(new Double(0));
+ lambda.add(0d);
}
}
}
@@ -1221,7 +1219,7 @@ public class MIRACore {
lambda.set(p, bestLambda.get(p));
// and set the rest of lambda to be 0
for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
- lambda.set(p + bestLambda.size(), new Double(0));
+ lambda.set(p + bestLambda.size(), 0d);
}
return null; // this means that the old values should be kept by the caller
@@ -1268,7 +1266,7 @@ public class MIRACore {
}
}
- Vector<String> output = new Vector<String>();
+ Vector<String> output = new Vector<>();
// note: initialLambda[] has length = numParamsOld
// augmented with new feature weights, initial values are 0
@@ -1312,8 +1310,8 @@ public class MIRACore {
/************* end optimization **************/
- for (int i = 0; i < output.size(); i++)
- println(output.get(i));
+ for (String anOutput : output)
+ println(anOutput);
// check if any parameter has been updated
boolean anyParamChanged = false;
@@ -1391,7 +1389,7 @@ public class MIRACore {
// (interpolation with previous wt vector)
double interCoef = 1.0; // no interpolation for now
for (int i = 1; i <= numParams; i++)
- lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
+ lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i));
println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
println("", 1);
@@ -1425,9 +1423,9 @@ public class MIRACore {
retStr += "(listing the first " + featToPrint + " lambdas)";
for (int c = 1; c <= featToPrint - 1; ++c) {
- retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
+ retStr += "" + String.format("%.4f", lambdaA.get(c)) + ", ";
}
- retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
+ retStr += "" + String.format("%.4f", lambdaA.get(numParams)) + "}";
return retStr;
}
@@ -1460,7 +1458,7 @@ public class MIRACore {
println("Running external decoder...", 1);
try {
- ArrayList<String> cmd = new ArrayList<String>();
+ ArrayList<String> cmd = new ArrayList<>();
cmd.add(decoderCommandFileName);
if (passIterationToDecoder)
@@ -1609,7 +1607,7 @@ public class MIRACore {
if (c_match == -1) {
outFile.println(line);
} else {
- if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
+ if (Math.abs(params.get(c_match)) > 1e-20)
outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
}
@@ -1618,7 +1616,7 @@ public class MIRACore {
// now append weights of new features
for (int c = origFeatNum + 1; c <= numParams; ++c) {
- if (Math.abs(params.get(c).doubleValue()) > 1e-20)
+ if (Math.abs(params.get(c)) > 1e-20)
outFile.println(Vocabulary.word(c) + " " + params.get(c));
}
@@ -1649,16 +1647,20 @@ public class MIRACore {
// read default value
lambda.set(c, inFile_init.nextDouble());
- defaultLambda[c] = lambda.get(c).doubleValue();
+ defaultLambda[c] = lambda.get(c);
// read isOptimizable
dummy = inFile_init.next();
- if (dummy.equals("Opt")) {
+ switch (dummy) {
+ case "Opt":
isOptimizable[c] = true;
- } else if (dummy.equals("Fix")) {
+ break;
+ case "Fix":
isOptimizable[c] = false;
- } else {
- throw new RuntimeException("Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
+ break;
+ default:
+ throw new RuntimeException(
+ "Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
}
if (!isOptimizable[c]) { // skip next two values
@@ -1727,9 +1729,11 @@ public class MIRACore {
dummy = (origLine.substring(origLine.indexOf("=") + 1)).trim();
String[] dummyA = dummy.split("\\s+");
- if (dummyA[0].equals("none")) {
+ switch (dummyA[0]) {
+ case "none":
normalizationOptions[0] = 0;
- } else if (dummyA[0].equals("absval")) {
+ break;
+ case "absval":
normalizationOptions[0] = 1;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
String pName = dummyA[2];
@@ -1745,37 +1749,43 @@ public class MIRACore {
throw new RuntimeException("Unrecognized feature name " + normalizationOptions[2]
+ " for absval normalization method.");
}
- } else if (dummyA[0].equals("maxabsval")) {
+ break;
+ case "maxabsval":
normalizationOptions[0] = 2;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
if (normalizationOptions[1] <= 0) {
- throw new RuntimeException("Value for the maxabsval normalization method must be positive.");
+ throw new RuntimeException(
+ "Value for the maxabsval normalization method must be positive.");
}
- } else if (dummyA[0].equals("minabsval")) {
+ break;
+ case "minabsval":
normalizationOptions[0] = 3;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
if (normalizationOptions[1] <= 0) {
- throw new RuntimeException("Value for the minabsval normalization method must be positive.");
+ throw new RuntimeException(
+ "Value for the minabsval normalization method must be positive.");
}
- } else if (dummyA[0].equals("LNorm")) {
+ break;
+ case "LNorm":
normalizationOptions[0] = 4;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
normalizationOptions[2] = Double.parseDouble(dummyA[2]);
if (normalizationOptions[1] <= 0 || normalizationOptions[2] <= 0) {
- throw new RuntimeException("Both values for the LNorm normalization method must be"
- + " positive.");
+ throw new RuntimeException(
+ "Both values for the LNorm normalization method must be" + " positive.");
}
- } else {
+ break;
+ default:
throw new RuntimeException("Unrecognized normalization method " + dummyA[0] + "; "
+ "must be one of none, absval, maxabsval, and LNorm.");
- } // if (dummyA[0])
+ }
inFile_init.close();
} // processParamFile()
private void processDocInfo() {
// sets numDocuments and docOfSentence[]
- docOfSentence = new int[numSentences];
+ int[] docOfSentence = new int[numSentences];
if (docInfoFileName == null) {
for (int i = 0; i < numSentences; ++i)
@@ -1832,7 +1842,7 @@ public class MIRACore {
boolean format3 = false;
- HashSet<String> seenStrings = new HashSet<String>();
+ HashSet<String> seenStrings = new HashSet<>();
BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
for (int i = 0; i < numSentences; ++i) {
// set format3 = true if a duplicate is found
@@ -1844,8 +1854,8 @@ public class MIRACore {
inFile.close();
- HashSet<String> seenDocNames = new HashSet<String>();
- HashMap<String, Integer> docOrder = new HashMap<String, Integer>();
+ HashSet<String> seenDocNames = new HashSet<>();
+ HashMap<String, Integer> docOrder = new HashMap<>();
// maps a document name to the order (0-indexed) in which it was seen
inFile = new BufferedReader(new FileReader(docInfoFileName));
@@ -1972,7 +1982,7 @@ public class MIRACore {
try {
PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
for (int c = 1; c <= numParams; ++c) {
- outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
+ outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c));
}
outFile_lambdas.close();
@@ -1986,7 +1996,7 @@ public class MIRACore {
private String[] cfgFileToArgsArray(String fileName) {
checkFile(fileName);
- Vector<String> argsVector = new Vector<String>();
+ Vector<String> argsVector = new Vector<>();
BufferedReader inFile = null;
try {
@@ -1998,7 +2008,7 @@ public class MIRACore {
if (line != null && line.length() > 0 && line.charAt(0) != '#') {
- if (line.indexOf("#") != -1) { // discard comment
+ if (line.contains("#")) { // discard comment
line = line.substring(0, line.indexOf("#"));
}
@@ -2021,7 +2031,7 @@ public class MIRACore {
// cmu modification(from meteor for zmert)
// Parse args
- ArrayList<String> argList = new ArrayList<String>();
+ ArrayList<String> argList = new ArrayList<>();
StringBuilder arg = new StringBuilder();
boolean quoted = false;
for (int i = 0; i < line.length(); i++) {
@@ -2054,9 +2064,7 @@ public class MIRACore {
argsVector.add(paramA[1]);
} else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
// -m (metricName), -docSet are allowed to have extra optinos
- for (int opt = 0; opt < paramA.length; ++opt) {
- argsVector.add(paramA[opt]);
- }
+ Collections.addAll(argsVector, paramA);
} else {
throw new RuntimeException("Malformed line in config file:" + origLine);
}
@@ -2085,8 +2093,8 @@ public class MIRACore {
private void processArgsArray(String[] args, boolean firstTime) {
/* set default values */
// Relevant files
- dirPrefix = null;
- sourceFileName = null;
+ String dirPrefix = null;
+ String sourceFileName = null;
refFileName = "reference.txt";
refsPerSen = 1;
textNormMethod = 1;
@@ -2109,7 +2117,7 @@ public class MIRACore {
//
// /* possibly other early stopping criteria here */
//
- numOptThreads = 1;
+ int numOptThreads = 1;
saveInterFiles = 3;
compressFiles = 0;
oneModificationPerIteration = false;
@@ -2135,30 +2143,39 @@ public class MIRACore {
while (i < args.length) {
String option = args[i];
// Relevant files
- if (option.equals("-dir")) {
+ switch (option) {
+ case "-dir":
dirPrefix = args[i + 1];
- } else if (option.equals("-s")) {
+ break;
+ case "-s":
sourceFileName = args[i + 1];
- } else if (option.equals("-r")) {
+ break;
+ case "-r":
refFileName = args[i + 1];
- } else if (option.equals("-rps")) {
+ break;
+ case "-rps":
refsPerSen = Integer.parseInt(args[i + 1]);
if (refsPerSen < 1) {
throw new RuntimeException("refsPerSen must be positive.");
}
- } else if (option.equals("-txtNrm")) {
+ break;
+ case "-txtNrm":
textNormMethod = Integer.parseInt(args[i + 1]);
if (textNormMethod < 0 || textNormMethod > 4) {
throw new RuntimeException("textNormMethod should be between 0 and 4");
}
- } else if (option.equals("-p")) {
+ break;
+ case "-p":
paramsFileName = args[i + 1];
- } else if (option.equals("-docInfo")) {
+ break;
+ case "-docInfo":
docInfoFileName = args[i + 1];
- } else if (option.equals("-fin")) {
+ break;
+ case "-fin":
finalLambdaFileName = args[i + 1];
// MERT specs
- } else if (option.equals("-m")) {
+ break;
+ case "-m":
metricName = args[i + 1];
metricName_display = metricName;
if (EvaluationMetric.knownMetricName(metricName)) {
@@ -2171,7 +2188,8 @@ public class MIRACore {
} else {
throw new RuntimeException("Unknown metric name " + metricName + ".");
}
- } else if (option.equals("-docSet")) {
+ break;
+ case "-docSet":
String method = args[i + 1];
if (method.equals("all")) {
@@ -2216,48 +2234,56 @@ public class MIRACore {
} else {
throw new RuntimeException("Unknown docSet method " + method + ".");
}
- } else if (option.equals("-maxIt")) {
+ break;
+ case "-maxIt":
maxMERTIterations = Integer.parseInt(args[i + 1]);
if (maxMERTIterations < 1) {
throw new RuntimeException("maxIt must be positive.");
}
- } else if (option.equals("-minIt")) {
+ break;
+ case "-minIt":
minMERTIterations = Integer.parseInt(args[i + 1]);
if (minMERTIterations < 1) {
throw new RuntimeException("minIt must be positive.");
}
- } else if (option.equals("-prevIt")) {
+ break;
+ case "-prevIt":
prevMERTIterations = Integer.parseInt(args[i + 1]);
if (prevMERTIterations < 0) {
throw new RuntimeException("prevIt must be non-negative.");
}
- } else if (option.equals("-stopIt")) {
+ break;
+ case "-stopIt":
stopMinIts = Integer.parseInt(args[i + 1]);
if (stopMinIts < 1) {
throw new RuntimeException("stopIts must be positive.");
}
- } else if (option.equals("-stopSig")) {
+ break;
+ case "-stopSig":
stopSigValue = Double.parseDouble(args[i + 1]);
- }
+ break;
//
// /* possibly other early stopping criteria here */
//
- else if (option.equals("-thrCnt")) {
+ case "-thrCnt":
numOptThreads = Integer.parseInt(args[i + 1]);
if (numOptThreads < 1) {
throw new RuntimeException("threadCount must be positive.");
}
- } else if (option.equals("-save")) {
+ break;
+ case "-save":
saveInterFiles = Integer.parseInt(args[i + 1]);
if (saveInterFiles < 0 || saveInterFiles > 3) {
throw new RuntimeException("save should be between 0 and 3");
}
- } else if (option.equals("-compress")) {
+ break;
+ case "-compress":
compressFiles = Integer.parseInt(args[i + 1]);
if (compressFiles < 0 || compressFiles > 1) {
throw new RuntimeException("compressFiles should be either 0 or 1");
}
- } else if (option.equals("-opi")) {
+ break;
+ case "-opi":
int opi = Integer.parseInt(args[i + 1]);
if (opi == 1) {
oneModificationPerIteration = true;
@@ -2266,7 +2292,8 @@ public class MIRACore {
} else {
throw new RuntimeException("oncePerIt must be either 0 or 1.");
}
- } else if (option.equals("-rand")) {
+ break;
+ case "-rand":
int rand = Integer.parseInt(args[i + 1]);
if (rand == 1) {
randInit = true;
@@ -2275,20 +2302,21 @@ public class MIRACore {
} else {
throw new RuntimeException("randInit must be either 0 or 1.");
}
- } else if (option.equals("-seed")) {
+ break;
+ case "-seed":
if (args[i + 1].equals("time")) {
seed = System.currentTimeMillis();
} else {
seed = Long.parseLong(args[i + 1]);
}
- }
+ break;
/*
* else if (option.equals("-ud")) { useDisk = Integer.parseInt(args[i+1]); if (useDisk < 0 ||
* useDisk > 2) { println("useDisk should be between 0 and 2"); System.exit(10); } }
*/
// for mira:
- else if (option.equals("-needShuffle")) {
+ case "-needShuffle":
int shuffle = Integer.parseInt(args[i + 1]);
if (shuffle == 1)
needShuffle = true;
@@ -2297,9 +2325,9 @@ public class MIRACore {
else {
throw new RuntimeException("-needShuffle must be either 0 or 1.");
}
- }
+ break;
// average weights after each epoch or not
- else if (option.equals("-needAvg")) {
+ case "-needAvg":
int avg = Integer.parseInt(args[i + 1]);
if (avg == 1)
needAvg = true;
@@ -2308,9 +2336,9 @@ public class MIRACore {
else {
throw new RuntimeException("-needAvg must be either 0 or 1.");
}
- }
+ break;
// return the best weight during tuning or not
- else if (option.equals("-returnBest")) {
+ case "-returnBest":
int retBest = Integer.parseInt(args[i + 1]);
if (retBest == 1)
returnBest = true;
@@ -2319,9 +2347,9 @@ public class MIRACore {
else {
throw new RuntimeException("-returnBest must be either 0 or 1.");
}
- }
+ break;
// run perceptron or not
- else if (option.equals("-runPercep")) {
+ case "-runPercep":
int per = Integer.parseInt(args[i + 1]);
if (per == 1)
runPercep = true;
@@ -2330,27 +2358,27 @@ public class MIRACore {
else {
throw new RuntimeException("-runPercep must be either 0 or 1.");
}
- }
+ break;
// oracle selection mode
- else if (option.equals("-oracleSelection")) {
+ case "-oracleSelection":
oraSelectMode = Integer.parseInt(args[i + 1]);
- }
+ break;
// prediction selection mode
- else if (option.equals("-predictionSelection")) {
+ case "-predictionSelection":
predSelectMode = Integer.parseInt(args[i + 1]);
- }
+ break;
// MIRA internal iterations
- else if (option.equals("-miraIter")) {
+ case "-miraIter":
miraIter = Integer.parseInt(args[i + 1]);
- }
+ break;
// mini-batch size
- else if (option.equals("-batchSize")) {
+ case "-batchSize":
batchSize = Integer.parseInt(args[i + 1]);
- }
+ break;
// relaxation coefficient
- else if (option.equals("-C")) {
+ case "-C":
C = Double.parseDouble(args[i + 1]);
- }
+ break;
// else if (option.equals("-sentForScaling")) {
// sentForScale = Double.parseDouble(args[i + 1]);
// if(sentForScale>1 || sentForScale<0) {
@@ -2358,12 +2386,13 @@ public class MIRACore {
// System.exit(10);
// }
// }
- else if (option.equals("-scoreRatio")) {
+ case "-scoreRatio":
scoreRatio = Double.parseDouble(args[i + 1]);
if (scoreRatio <= 0) {
throw new RuntimeException("-scoreRatio must be positive");
}
- } else if (option.equals("-needScaling")) {
+ break;
+ case "-needScaling":
int scale = Integer.parseInt(args[i + 1]);
if (scale == 1)
needScale = true;
@@ -2372,7 +2401,8 @@ public class MIRACore {
else {
throw new RuntimeException("-needScaling must be either 0 or 1.");
}
- } else if (option.equals("-usePseudoCorpus")) {
+ break;
+ case "-usePseudoCorpus":
int use = Integer.parseInt(args[i + 1]);
if (use == 1)
usePseudoBleu = true;
@@ -2381,51 +2411,61 @@ public class MIRACore {
else {
throw new RuntimeException("-usePseudoCorpus must be either 0 or 1.");
}
- } else if (option.equals("-corpusDecay")) {
+ break;
+ case "-corpusDecay":
R = Double.parseDouble(args[i + 1]);
- }
+ break;
// Decoder specs
- else if (option.equals("-cmd")) {
+ case "-cmd":
decoderCommandFileName = args[i + 1];
- } else if (option.equals("-passIt")) {
+ break;
+ case "-passIt":
int val = Integer.parseInt(args[i + 1]);
if (val < 0 || val > 1) {
throw new RuntimeException("passIterationToDecoder should be either 0 or 1");
}
- passIterationToDecoder = (val == 1) ? true : false;
- } else if (option.equals("-decOut")) {
+ passIterationToDecoder = (val == 1);
+ break;
+ case "-decOut":
decoderOutFileName = args[i + 1];
- } else if (option.equals("-decExit")) {
+ break;
+ case "-decExit":
validDecoderExitValue = Integer.parseInt(args[i + 1]);
- } else if (option.equals("-dcfg")) {
+ break;
+ case "-dcfg":
decoderConfigFileName = args[i + 1];
- } else if (option.equals("-N")) {
+ break;
+ case "-N":
sizeOfNBest = Integer.parseInt(args[i + 1]);
if (sizeOfNBest < 1) {
throw new RuntimeException("N must be positive.");
}
- }
+ break;
// Output specs
- else if (option.equals("-v")) {
+ case "-v":
verbosity = Integer.parseInt(args[i + 1]);
if (verbosity < 0 || verbosity > 4) {
throw new RuntimeException("verbosity should be between 0 and 4");
}
- } else if (option.equals("-decV")) {
+ break;
+ case "-decV":
decVerbosity = Integer.parseInt(args[i + 1]);
if (decVerbosity < 0 || decVerbosity > 1) {
throw new RuntimeException("decVerbosity should be either 0 or 1");
}
- } else if (option.equals("-fake")) {
+ break;
+ case "-fake":
fakeFileNameTemplate = args[i + 1];
int QM_i = fakeFileNameTemplate.indexOf("?");
if (QM_i <= 0) {
- throw new RuntimeException("fakeFileNameTemplate must contain '?' to indicate position of iteration number");
+ throw new RuntimeException(
+ "fakeFileNameTemplate must contain '?' to indicate position of iteration number");
}
fakeFileNamePrefix = fakeFileNameTemplate.substring(0, QM_i);
fakeFileNameSuffix = fakeFileNameTemplate.substring(QM_i + 1);
- } else {
+ break;
+ default:
throw new RuntimeException("Unknown option " + option);
}
@@ -2785,7 +2825,7 @@ public class MIRACore {
str = " " + str + " ";
str = str.replaceAll("\\s+", " ");
- TreeSet<Integer> splitIndices = new TreeSet<Integer>();
+ TreeSet<Integer> splitIndices = new TreeSet<>();
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
@@ -2832,7 +2872,7 @@ public class MIRACore {
// remove spaces around dashes
if (normMethod == 2 || normMethod == 4) {
- TreeSet<Integer> skipIndices = new TreeSet<Integer>();
+ TreeSet<Integer> skipIndices = new TreeSet<>();
str = " " + str + " ";
for (int i = 0; i < str.length(); ++i) {
@@ -2903,7 +2943,7 @@ public class MIRACore {
}
private ArrayList<Double> randomLambda() {
- ArrayList<Double> retLambda = new ArrayList<Double>(1 + numParams);
+ ArrayList<Double> retLambda = new ArrayList<>(1 + numParams);
for (int c = 1; c <= numParams; ++c) {
if (isOptimizable[c]) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0fc2f497/src/main/java/org/apache/joshua/mira/Optimizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/mira/Optimizer.java b/src/main/java/org/apache/joshua/mira/Optimizer.java
index f51a5b3..6592a5d 100755
--- a/src/main/java/org/apache/joshua/mira/Optimizer.java
+++ b/src/main/java/org/apache/joshua/mira/Optimizer.java
@@ -42,13 +42,12 @@ public class Optimizer {
feat_hash = _feat_hash; // feature hash table
stats_hash = _stats_hash; // suff. stats hash table
finalLambda = new double[initialLambda.length];
- for (int i = 0; i < finalLambda.length; i++)
- finalLambda[i] = initialLambda[i];
+ System.arraycopy(initialLambda, 0, finalLambda, 0, finalLambda.length);
}
// run MIRA for one epoch
public double[] runOptimizer() {
- List<Integer> sents = new ArrayList<Integer>();
+ List<Integer> sents = new ArrayList<>();
for (int i = 0; i < sentNum; ++i)
sents.add(i);
double[] avgLambda = new double[initialLambda.length]; // only needed if averaging is required
@@ -90,7 +89,7 @@ public class Optimizer {
loss = 0;
thisBatchSize = batchSize;
++numBatch;
- HashMap<Integer, Double> featDiff = new HashMap<Integer, Double>();
+ HashMap<Integer, Double> featDiff = new HashMap<>();
for(int b = 0; b < batchSize; ++b ) {
//find out oracle and prediction
s = sents.get(sentCount);
@@ -120,51 +119,48 @@ public class Optimizer {
//accumulate difference feature vector
if ( b == 0 ) {
- for (int i = 0; i < vecOraFeat.length; i++) {
- featInfo = vecOraFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
- }
- for (int i = 0; i < vecPredFeat.length; i++) {
- featInfo = vecPredFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- if (featDiff.containsKey(diffFeatId)) { //overlapping features
- diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
- if ( Math.abs(diff) > 1e-20 )
- featDiff.put(diffFeatId, diff);
- else
- featDiff.remove(diffFeatId);
+ for (String aVecOraFeat : vecOraFeat) {
+ featInfo = aVecOraFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
+ }
+ for (String aVecPredFeat : vecPredFeat) {
+ featInfo = aVecPredFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId) - Double.parseDouble(featInfo[1]);
+ if (Math.abs(diff) > 1e-20)
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ } else //features only firing in the 2nd feature vector
+ featDiff.put(diffFeatId, -1.0 * Double.parseDouble(featInfo[1]));
}
- else //features only firing in the 2nd feature vector
- featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
- }
} else {
- for (int i = 0; i < vecOraFeat.length; i++) {
- featInfo = vecOraFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- if (featDiff.containsKey(diffFeatId)) { //overlapping features
- diff = featDiff.get(diffFeatId)+Double.parseDouble(featInfo[1]);
- if ( Math.abs(diff) > 1e-20 )
- featDiff.put(diffFeatId, diff);
- else
- featDiff.remove(diffFeatId);
+ for (String aVecOraFeat : vecOraFeat) {
+ featInfo = aVecOraFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId) + Double.parseDouble(featInfo[1]);
+ if (Math.abs(diff) > 1e-20)
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ } else //features only firing in the new oracle feature vector
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
}
- else //features only firing in the new oracle feature vector
- featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
- }
- for (int i = 0; i < vecPredFeat.length; i++) {
- featInfo = vecPredFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- if (featDiff.containsKey(diffFeatId)) { //overlapping features
- diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
- if ( Math.abs(diff) > 1e-20 )
- featDiff.put(diffFeatId, diff);
- else
- featDiff.remove(diffFeatId);
+ for (String aVecPredFeat : vecPredFeat) {
+ featInfo = aVecPredFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId) - Double.parseDouble(featInfo[1]);
+ if (Math.abs(diff) > 1e-20)
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ } else //features only firing in the new prediction feature vector
+ featDiff.put(diffFeatId, -1.0 * Double.parseDouble(featInfo[1]));
}
- else //features only firing in the new prediction feature vector
- featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
- }
}
if (!runPercep) { // otherwise eta=1.0
// remember the model scores here are already scaled
@@ -183,10 +179,10 @@ public class Optimizer {
if (!runPercep) { // otherwise eta=1.0
featNorm = 0;
Collection<Double> allDiff = featDiff.values();
- for (Iterator<Double> it = allDiff.iterator(); it.hasNext();) {
- diff = it.next();
- featNorm += diff * diff / ( thisBatchSize * thisBatchSize );
- }
+ for (Double anAllDiff : allDiff) {
+ diff = anAllDiff;
+ featNorm += diff * diff / (thisBatchSize * thisBatchSize);
+ }
}
if( loss <= 0 )
eta = 0;
@@ -269,7 +265,7 @@ public class Optimizer {
finalMetricScore = bestMetricScore;
// non-optimizable weights should remain unchanged
- ArrayList<Double> cpFixWt = new ArrayList<Double>();
+ ArrayList<Double> cpFixWt = new ArrayList<>();
for (int i = 1; i < isOptimizable.length; ++i) {
if (!isOptimizable[i])
cpFixWt.add(finalLambda[i]);
@@ -302,21 +298,22 @@ public class Optimizer {
// find out the 1-best candidate for each sentence
// this depends on the training mode
maxModelScore = NegInf;
- for (Iterator<String> it = candSet.iterator(); it.hasNext();) {
- modelScore = 0.0;
- candStr = it.next().toString();
- feat_str = feat_hash[i].get(candStr).split("\\s+");
- String[] feat_info;
- for (int f = 0; f < feat_str.length; f++) {
- feat_info = feat_str[f].split("=");
- modelScore += Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
- }
- if (maxModelScore < modelScore) {
- maxModelScore = modelScore;
- tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
- // suff stats
+ for (String aCandSet : candSet) {
+ modelScore = 0.0;
+ candStr = aCandSet;
+ feat_str = feat_hash[i].get(candStr).split("\\s+");
+ String[] feat_info;
+ for (String aFeat_str : feat_str) {
+ feat_info = aFeat_str.split("=");
+ modelScore +=
+ Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
+ }
+ if (maxModelScore < modelScore) {
+ maxModelScore = modelScore;
+ tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
+ // suff stats
+ }
}
- }
for (int j = 0; j < suffStatsCount; j++)
corpusStatsVal[j] += Integer.parseInt(tmpStatsVal[j]); // accumulate
@@ -361,107 +358,107 @@ public class Optimizer {
worstPredScore = PosInf;
}
- for (Iterator<String> it = candSet.iterator(); it.hasNext();) {
- cand = it.next().toString();
- candMetric = computeSentMetric(sentId, cand); // compute metric score
-
- // start to compute model score
- candScore = 0;
- featStr = feat_hash[sentId].get(cand).split("\\s+");
- feats = "";
-
- for (int i = 0; i < featStr.length; i++) {
- featInfo = featStr[i].split("=");
- actualFeatId = Vocabulary.id(featInfo[0]);
- candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
- if ((actualFeatId < isOptimizable.length && isOptimizable[actualFeatId])
- || actualFeatId >= isOptimizable.length)
- feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
- }
-
- candScore *= featScale; // scale the model score
-
- // is this cand oracle?
- if (oraSelectMode == 1) {// "hope", b=1, r=1
- if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
- if (bestOraScore <= (candScore - candMetric)) {
- bestOraScore = candScore - candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- } else {
- if (bestOraScore <= (candScore + candMetric)) {
- bestOraScore = candScore + candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
+ for (String aCandSet : candSet) {
+ cand = aCandSet;
+ candMetric = computeSentMetric(sentId, cand); // compute metric score
+
+ // start to compute model score
+ candScore = 0;
+ featStr = feat_hash[sentId].get(cand).split("\\s+");
+ feats = "";
+
+ for (String aFeatStr : featStr) {
+ featInfo = aFeatStr.split("=");
+ actualFeatId = Vocabulary.id(featInfo[0]);
+ candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
+ if ((actualFeatId < isOptimizable.length && isOptimizable[actualFeatId])
+ || actualFeatId >= isOptimizable.length)
+ feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
}
- }
- } else {// best metric score(ex: max BLEU), b=1, r=0
- if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
- if (bestOraScore >= candMetric) {
- bestOraScore = candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- } else {
- if (bestOraScore <= candMetric) {
- bestOraScore = candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- }
- // is this cand prediction?
- if (predSelectMode == 1) {// "fear"
- if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
- if (worstPredScore <= (candScore + candMetric)) {
- worstPredScore = candScore + candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- } else {
- if (worstPredScore <= (candScore - candMetric)) {
- worstPredScore = candScore - candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
+ candScore *= featScale; // scale the model score
+
+ // is this cand oracle?
+ if (oraSelectMode == 1) {// "hope", b=1, r=1
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (bestOraScore <= (candScore - candMetric)) {
+ bestOraScore = candScore - candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ } else {
+ if (bestOraScore <= (candScore + candMetric)) {
+ bestOraScore = candScore + candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ } else {// best metric score(ex: max BLEU), b=1, r=0
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (bestOraScore >= candMetric) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ } else {
+ if (bestOraScore <= candMetric) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
}
- }
- } else if (predSelectMode == 2) {// model prediction(max model score)
- if (worstPredScore <= candScore) {
- worstPredScore = candScore;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- } else {// worst metric score(ex: min BLEU)
- if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
- if (worstPredScore <= candMetric) {
- worstPredScore = candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- } else {
- if (worstPredScore >= candMetric) {
- worstPredScore = candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
+
+ // is this cand prediction?
+ if (predSelectMode == 1) {// "fear"
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (worstPredScore <= (candScore + candMetric)) {
+ worstPredScore = candScore + candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {
+ if (worstPredScore <= (candScore - candMetric)) {
+ worstPredScore = candScore - candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ } else if (predSelectMode == 2) {// model prediction(max model score)
+ if (worstPredScore <= candScore) {
+ worstPredScore = candScore;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {// worst metric score(ex: min BLEU)
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (worstPredScore <= candMetric) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {
+ if (worstPredScore >= candMetric) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
}
- }
}
- }
oraPredScore[0] = oraMetric;
oraPredScore[1] = oraScore;
@@ -608,14 +605,14 @@ public class Optimizer {
return finalMetricScore;
}
- private Vector<String> output;
+ private final Vector<String> output;
private double[] initialLambda;
- private double[] finalLambda;
+ private final double[] finalLambda;
private double finalMetricScore;
- private HashMap<String, String>[] feat_hash;
- private HashMap<String, String>[] stats_hash;
- private int paramDim;
- private boolean[] isOptimizable;
+ private final HashMap<String, String>[] feat_hash;
+ private final HashMap<String, String>[] stats_hash;
+ private final int paramDim;
+ private final boolean[] isOptimizable;
public static int sentNum;
public static int miraIter; // MIRA internal iterations
public static int oraSelectMode;
@@ -636,6 +633,6 @@ public class Optimizer {
public static double[] normalizationOptions;
public static double[][] bleuHistory;
- private final static double NegInf = (-1.0 / 0.0);
- private final static double PosInf = (+1.0 / 0.0);
+ private final static double NegInf = Double.NEGATIVE_INFINITY;
+ private final static double PosInf = Double.POSITIVE_INFINITY;
}
[4/7] incubator-joshua git commit: static analysis based code
improvements on oracle package
Posted by to...@apache.org.
static analysis based code improvements on oracle package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/13f258d3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/13f258d3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/13f258d3
Branch: refs/heads/master
Commit: 13f258d321ffa159f1c0e655edf9f5fa0529e1f4
Parents: 0fc2f49
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 13:55:05 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 13:55:05 2016 +0200
----------------------------------------------------------------------
.../joshua/oracle/OracleExtractionHG.java | 127 +++++++++----------
.../java/org/apache/joshua/oracle/SplitHg.java | 37 +++---
2 files changed, 80 insertions(+), 84 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/13f258d3/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java b/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
index 575515a..017a296 100644
--- a/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
+++ b/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
@@ -46,37 +46,37 @@ import org.apache.joshua.util.FormatUtils;
* @author Zhifei Li, zhifei.work@gmail.com (Johns Hopkins University)
*/
public class OracleExtractionHG extends SplitHg {
- static String BACKOFF_LEFT_LM_STATE_SYM = "<lzfbo>";
- public int BACKOFF_LEFT_LM_STATE_SYM_ID;// used for equivelant state
+ static final String BACKOFF_LEFT_LM_STATE_SYM = "<lzfbo>";
+ public final int BACKOFF_LEFT_LM_STATE_SYM_ID;// used for equivelant state
static String NULL_LEFT_LM_STATE_SYM = "<lzflnull>";
- public int NULL_LEFT_LM_STATE_SYM_ID;// used for equivelant state
+ public final int NULL_LEFT_LM_STATE_SYM_ID;// used for equivelant state
- static String NULL_RIGHT_LM_STATE_SYM = "<lzfrnull>";
- public int NULL_RIGHT_LM_STATE_SYM_ID;// used for equivelant state
+ static final String NULL_RIGHT_LM_STATE_SYM = "<lzfrnull>";
+ public final int NULL_RIGHT_LM_STATE_SYM_ID;// used for equivelant state
// int[] ref_sentence;//reference string (not tree)
protected int src_sent_len = 0;
protected int ref_sent_len = 0;
protected int g_lm_order = 4; // only used for decide whether to get the LM state by this class or
// not in compute_state
- static protected boolean do_local_ngram_clip = false;
- static protected boolean maitain_length_state = false;
- static protected int g_bleu_order = 4;
+ static protected final boolean do_local_ngram_clip = false;
+ static protected final boolean maitain_length_state = false;
+ static protected final int g_bleu_order = 4;
- static boolean using_left_equiv_state = true;
- static boolean using_right_equiv_state = true;
+ static final boolean using_left_equiv_state = true;
+ static final boolean using_right_equiv_state = true;
// TODO Add generics to hash tables in this class
- HashMap<String, Boolean> tbl_suffix = new HashMap<String, Boolean>();
- HashMap<String, Boolean> tbl_prefix = new HashMap<String, Boolean>();
- static PrefixGrammar grammar_prefix = new PrefixGrammar();// TODO
- static PrefixGrammar grammar_suffix = new PrefixGrammar();// TODO
+ final HashMap<String, Boolean> tbl_suffix = new HashMap<>();
+ final HashMap<String, Boolean> tbl_prefix = new HashMap<>();
+ static final PrefixGrammar grammar_prefix = new PrefixGrammar();// TODO
+ static final PrefixGrammar grammar_suffix = new PrefixGrammar();// TODO
// key: item; value: best_deduction, best_bleu, best_len, # of n-gram match where n is in [1,4]
- protected HashMap<String, Integer> tbl_ref_ngrams = new HashMap<String, Integer>();
+ protected final HashMap<String, Integer> tbl_ref_ngrams = new HashMap<>();
- static boolean always_maintain_seperate_lm_state = true; // if true: the virtual item maintain its
+ static final boolean always_maintain_seperate_lm_state = true; // if true: the virtual item maintain its
// own lm state regardless whether
// lm_order>=g_bleu_order
@@ -114,8 +114,8 @@ public class OracleExtractionHG extends SplitHg {
System.out
.println("Usage: java Decoder f_hypergraphs f_rule_tbl f_ref_files f_orc_out lm_order orc_extract_nbest");
System.out.println("num of args is " + args.length);
- for (int i = 0; i < args.length; i++) {
- System.out.println("arg is: " + args[i]);
+ for (String arg : args) {
+ System.out.println("arg is: " + arg);
}
System.exit(1);
}
@@ -166,8 +166,8 @@ public class OracleExtractionHG extends SplitHg {
time_on_reading += System.currentTimeMillis() - start_time;
start_time = System.currentTimeMillis();
- String orc_sent = null;
- double orc_bleu = 0;
+ String orc_sent;
+ double orc_bleu;
if (orc_extract_nbest) {
Object[] res = orc_extractor.oracle_extract_nbest(kbest_extractor, hg, topN,
do_ngram_clip_nbest, ref_sent);
@@ -286,10 +286,10 @@ public class OracleExtractionHG extends SplitHg {
// DPState maintain all the state information at an item that is required during dynamic
// programming
protected static class DPStateOracle extends DPState {
- int best_len; // this may not be used in the signature
- int[] ngram_matches;
- int[] left_lm_state;
- int[] right_lm_state;
+ final int best_len; // this may not be used in the signature
+ final int[] ngram_matches;
+ final int[] left_lm_state;
+ final int[] right_lm_state;
public DPStateOracle(int blen, int[] matches, int[] left, int[] right) {
best_len = blen;
@@ -299,22 +299,22 @@ public class OracleExtractionHG extends SplitHg {
}
protected String get_signature() {
- StringBuffer res = new StringBuffer();
+ StringBuilder res = new StringBuilder();
if (maitain_length_state) {
res.append(best_len);
res.append(' ');
}
if (null != left_lm_state) { // goal-item have null state
- for (int i = 0; i < left_lm_state.length; i++) {
- res.append(left_lm_state[i]);
+ for (int aLeft_lm_state : left_lm_state) {
+ res.append(aLeft_lm_state);
res.append(' ');
}
}
res.append("lzf ");
if (null != right_lm_state) { // goal-item have null state
- for (int i = 0; i < right_lm_state.length; i++) {
- res.append(right_lm_state[i]);
+ for (int aRight_lm_state : right_lm_state) {
+ res.append(aRight_lm_state);
res.append(' ');
}
}
@@ -324,12 +324,12 @@ public class OracleExtractionHG extends SplitHg {
}
protected void print() {
- StringBuffer res = new StringBuffer();
+ StringBuilder res = new StringBuilder();
res.append("DPstate: best_len: ");
res.append(best_len);
- for (int i = 0; i < ngram_matches.length; i++) {
+ for (int ngram_matche : ngram_matches) {
res.append("; ngram: ");
- res.append(ngram_matches[i]);
+ res.append(ngram_matche);
}
System.out.println(res.toString());
}
@@ -352,10 +352,10 @@ public class OracleExtractionHG extends SplitHg {
}
// ################## deductions *not* under "goal item"
- HashMap<String, Integer> new_ngram_counts = new HashMap<String, Integer>();// new ngrams created
+ HashMap<String, Integer> new_ngram_counts = new HashMap<>();// new ngrams created
// due to the
// combination
- HashMap<String, Integer> old_ngram_counts = new HashMap<String, Integer>();// the ngram that has
+ HashMap<String, Integer> old_ngram_counts = new HashMap<>();// the ngram that has
// already been
// computed
int total_hyp_len = 0;
@@ -364,7 +364,7 @@ public class OracleExtractionHG extends SplitHg {
// ####calulate new and old ngram counts, and len
- ArrayList<Integer> words = new ArrayList<Integer>();
+ ArrayList<Integer> words = new ArrayList<>();
// used for compute left- and right- lm state
ArrayList<Integer> left_state_sequence = null;
@@ -373,15 +373,14 @@ public class OracleExtractionHG extends SplitHg {
int correct_lm_order = lm_order;
if (always_maintain_seperate_lm_state || lm_order < g_bleu_order) {
- left_state_sequence = new ArrayList<Integer>();
- right_state_sequence = new ArrayList<Integer>();
+ left_state_sequence = new ArrayList<>();
+ right_state_sequence = new ArrayList<>();
correct_lm_order = g_bleu_order; // if lm_order is smaller than g_bleu_order, we will get the
// lm state by ourself
}
// #### get left_state_sequence, right_state_sequence, total_hyp_len, num_ngram_match
- for (int c = 0; c < en_words.length; c++) {
- int c_id = en_words[c];
+ for (int c_id : en_words) {
if (FormatUtils.isNonterminal(c_id)) {
int index = -(c_id + 1);
DPStateOracle ant_state = (DPStateOracle) l_ant_virtual_item.get(index).dp_state;
@@ -430,9 +429,9 @@ public class OracleExtractionHG extends SplitHg {
// ####now deduct ngram counts
for (String ngram : new_ngram_counts.keySet()) {
if (tbl_ref_ngrams.containsKey(ngram)) {
- int final_count = (Integer) new_ngram_counts.get(ngram);
+ int final_count = new_ngram_counts.get(ngram);
if (old_ngram_counts.containsKey(ngram)) {
- final_count -= (Integer) old_ngram_counts.get(ngram);
+ final_count -= old_ngram_counts.get(ngram);
// BUG: Whoa, is that an actual hard-coded ID in there? :)
if (final_count < 0) {
throw new RuntimeException("negative count for ngram: " + Vocabulary.word(11844)
@@ -443,7 +442,7 @@ public class OracleExtractionHG extends SplitHg {
if (do_local_ngram_clip) {
// BUG: use joshua.util.Regex.spaces.split(...)
num_ngram_match[ngram.split("\\s+").length - 1] += Support.findMin(final_count,
- (Integer) tbl_ref_ngrams.get(ngram));
+ tbl_ref_ngrams.get(ngram));
} else {
// BUG: use joshua.util.Regex.spaces.split(...)
num_ngram_match[ngram.split("\\s+").length - 1] += final_count; // do not do any cliping
@@ -453,8 +452,8 @@ public class OracleExtractionHG extends SplitHg {
}
// ####now calculate the BLEU score and state
- int[] left_lm_state = null;
- int[] right_lm_state = null;
+ int[] left_lm_state;
+ int[] right_lm_state;
left_lm_state = get_left_equiv_state(left_state_sequence, tbl_suffix);
right_lm_state = get_right_equiv_state(right_state_sequence, tbl_prefix);
@@ -500,16 +499,16 @@ public class OracleExtractionHG extends SplitHg {
private boolean is_a_suffix_in_tbl(ArrayList<Integer> left_state_sequence, int start_pos,
int end_pos, HashMap<String, Boolean> tbl_suffix) {
- if ((Integer) left_state_sequence.get(end_pos) == this.NULL_LEFT_LM_STATE_SYM_ID) {
+ if (left_state_sequence.get(end_pos) == this.NULL_LEFT_LM_STATE_SYM_ID) {
return false;
}
- StringBuffer suffix = new StringBuffer();
+ StringBuilder suffix = new StringBuilder();
for (int i = end_pos; i >= start_pos; i--) { // right-most first
suffix.append(left_state_sequence.get(i));
if (i > start_pos)
suffix.append(' ');
}
- return (Boolean) tbl_suffix.containsKey(suffix.toString());
+ return tbl_suffix.containsKey(suffix.toString());
}
private int[] get_right_equiv_state(ArrayList<Integer> right_state_sequence,
@@ -519,7 +518,7 @@ public class OracleExtractionHG extends SplitHg {
int[] right_lm_state = new int[r_size];
if (!using_right_equiv_state || r_size < g_bleu_order - 1) { // regular
for (int i = 0; i < r_size; i++) {
- right_lm_state[i] = (Integer) right_state_sequence.get(right_state_sequence.size() - r_size
+ right_lm_state[i] = right_state_sequence.get(right_state_sequence.size() - r_size
+ i);
}
} else {
@@ -529,7 +528,7 @@ public class OracleExtractionHG extends SplitHg {
// if(is_a_prefix_in_grammar(right_state_sequence, right_state_sequence.size()-r_size+i,
// right_state_sequence.size()-1, grammar_prefix)){
for (int j = i; j < r_size; j++) {
- right_lm_state[j] = (Integer) right_state_sequence.get(right_state_sequence.size()
+ right_lm_state[j] = right_state_sequence.get(right_state_sequence.size()
- r_size + j);
}
break;
@@ -548,13 +547,13 @@ public class OracleExtractionHG extends SplitHg {
if (right_state_sequence.get(start_pos) == this.NULL_RIGHT_LM_STATE_SYM_ID) {
return false;
}
- StringBuffer prefix = new StringBuffer();
+ StringBuilder prefix = new StringBuilder();
for (int i = start_pos; i <= end_pos; i++) {
prefix.append(right_state_sequence.get(i));
if (i < end_pos)
prefix.append(' ');
}
- return (Boolean) tbl_prefix.containsKey(prefix.toString());
+ return tbl_prefix.containsKey(prefix.toString());
}
public static void compare_two_int_arrays(int[] a, int[] b) {
@@ -574,7 +573,7 @@ public class OracleExtractionHG extends SplitHg {
if (hyp_len <= 0 || ref_len <= 0) {
throw new RuntimeException("ref or hyp is zero len");
}
- double res = 0;
+ double res;
double wt = 1.0 / bleu_order;
double prec = 0;
double smooth_factor = 1.0;
@@ -599,7 +598,7 @@ public class OracleExtractionHG extends SplitHg {
for (int i = 0; i < wrds.length; i++) {
for (int j = 0; j < order && j + i < wrds.length; j++) { // ngram: [i,i+j]
boolean contain_null = false;
- StringBuffer ngram = new StringBuffer();
+ StringBuilder ngram = new StringBuilder();
for (int k = i; k <= i + j; k++) {
if (wrds[k] == this.NULL_LEFT_LM_STATE_SYM_ID
|| wrds[k] == this.NULL_RIGHT_LM_STATE_SYM_ID) {
@@ -615,7 +614,7 @@ public class OracleExtractionHG extends SplitHg {
continue; // skip this ngram
String ngram_str = ngram.toString();
if (tbl.containsKey(ngram_str)) {
- tbl.put(ngram_str, (Integer) tbl.get(ngram_str) + 1);
+ tbl.put(ngram_str, tbl.get(ngram_str) + 1);
} else {
tbl.put(ngram_str, 1);
}
@@ -636,9 +635,9 @@ public class OracleExtractionHG extends SplitHg {
// ngram: [i,i+j]
for (int j = 0; j < order && j + i < wrds.size(); j++) {
boolean contain_null = false;
- StringBuffer ngram = new StringBuffer();
+ StringBuilder ngram = new StringBuilder();
for (int k = i; k <= i + j; k++) {
- int t_wrd = (Integer) wrds.get(k);
+ int t_wrd = wrds.get(k);
if (t_wrd == this.NULL_LEFT_LM_STATE_SYM_ID || t_wrd == this.NULL_RIGHT_LM_STATE_SYM_ID) {
contain_null = true;
if (ignore_null_equiv_symbol)
@@ -654,7 +653,7 @@ public class OracleExtractionHG extends SplitHg {
String ngram_str = ngram.toString();
if (tbl.containsKey(ngram_str)) {
- tbl.put(ngram_str, (Integer) tbl.get(ngram_str) + 1);
+ tbl.put(ngram_str, tbl.get(ngram_str) + 1);
} else {
tbl.put(ngram_str, 1);
}
@@ -673,11 +672,11 @@ public class OracleExtractionHG extends SplitHg {
public double compute_sentence_bleu(int[] ref_sent, int[] hyp_sent, boolean do_ngram_clip,
int bleu_order) {
- double res_bleu = 0;
+ double res_bleu;
int order = 4;
- HashMap<String, Integer> ref_ngram_tbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> ref_ngram_tbl = new HashMap<>();
get_ngrams(ref_ngram_tbl, order, ref_sent, false);
- HashMap<String, Integer> hyp_ngram_tbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> hyp_ngram_tbl = new HashMap<>();
get_ngrams(hyp_ngram_tbl, order, hyp_sent, false);
int[] num_ngram_match = new int[order];
@@ -686,10 +685,10 @@ public class OracleExtractionHG extends SplitHg {
if (do_ngram_clip) {
// BUG: use joshua.util.Regex.spaces.split(...)
num_ngram_match[ngram.split("\\s+").length - 1] += Support.findMin(
- (Integer) ref_ngram_tbl.get(ngram), (Integer) hyp_ngram_tbl.get(ngram)); // ngram clip
+ ref_ngram_tbl.get(ngram), hyp_ngram_tbl.get(ngram)); // ngram clip
} else {
// BUG: use joshua.util.Regex.spaces.split(...)
- num_ngram_match[ngram.split("\\s+").length - 1] += (Integer) hyp_ngram_tbl.get(ngram);// without
+ num_ngram_match[ngram.split("\\s+").length - 1] += hyp_ngram_tbl.get(ngram);// without
// ngram
// count
// clipping
@@ -754,9 +753,9 @@ public class OracleExtractionHG extends SplitHg {
private static class PrefixGrammarNode extends HashMap<Integer, PrefixGrammarNode> {
private static final long serialVersionUID = 1L;
- };
+ }
- PrefixGrammarNode root = new PrefixGrammarNode();
+ final PrefixGrammarNode root = new PrefixGrammarNode();
// add prefix information
public void add_ngram(int[] wrds, int start_pos, int end_pos) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/13f258d3/src/main/java/org/apache/joshua/oracle/SplitHg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/oracle/SplitHg.java b/src/main/java/org/apache/joshua/oracle/SplitHg.java
index 9fcdd35..8d37949 100644
--- a/src/main/java/org/apache/joshua/oracle/SplitHg.java
+++ b/src/main/java/org/apache/joshua/oracle/SplitHg.java
@@ -21,6 +21,7 @@ package org.apache.joshua.oracle;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.stream.Collectors;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.hypergraph.HyperEdge;
@@ -34,7 +35,7 @@ import org.apache.joshua.decoder.hypergraph.HyperGraph;
*/
public abstract class SplitHg {
- HashMap<HGNode, ArrayList<VirtualItem>> g_tbl_split_virtual_items = new HashMap<HGNode, ArrayList<VirtualItem>>();
+ final HashMap<HGNode, ArrayList<VirtualItem>> g_tbl_split_virtual_items = new HashMap<>();
// number of items or deductions after splitting the hypergraph
public int g_num_virtual_items = 0;
@@ -52,9 +53,8 @@ public abstract class SplitHg {
// g_tbl_split_virtual_items
public double get_best_goal_cost(HyperGraph hg,
HashMap<HGNode, ArrayList<VirtualItem>> g_tbl_split_virtual_items) {
- double res = get_virtual_goal_item(hg, g_tbl_split_virtual_items).best_virtual_deduction.best_cost;
// System.out.println("best bleu is " +res);
- return res;
+ return get_virtual_goal_item(hg, g_tbl_split_virtual_items).best_virtual_deduction.best_cost;
}
public VirtualItem get_virtual_goal_item(HyperGraph original_hg,
@@ -84,7 +84,7 @@ public abstract class SplitHg {
VirtualDeduction virtual_dt = virtual_it.best_virtual_deduction;
if (virtual_dt.l_ant_virtual_items != null)
for (int i = 0; i < virtual_dt.l_ant_virtual_items.size(); i++) {
- VirtualItem ant_it = (VirtualItem) virtual_dt.l_ant_virtual_items.get(i);
+ VirtualItem ant_it = virtual_dt.l_ant_virtual_items.get(i);
HGNode new_it = clone_item_with_best_deduction(ant_it);
onebest_item.bestHyperedge.getTailNodes().set(i, new_it);
get_1best_tree_item(ant_it, new_it);
@@ -94,7 +94,7 @@ public abstract class SplitHg {
// TODO: tbl_states
private static HGNode clone_item_with_best_deduction(VirtualItem virtual_it) {
HGNode original_it = virtual_it.p_item;
- ArrayList<HyperEdge> l_deductions = new ArrayList<HyperEdge>();
+ ArrayList<HyperEdge> l_deductions = new ArrayList<>();
HyperEdge clone_dt = clone_deduction(virtual_it.best_virtual_deduction);
l_deductions.add(clone_dt);
return new HGNode(original_it.i, original_it.j, original_it.lhs, l_deductions, clone_dt,
@@ -106,10 +106,9 @@ public abstract class SplitHg {
ArrayList<HGNode> l_ant_items = null;
// l_ant_items will be changed in get_1best_tree_item
if (original_dt.getTailNodes() != null)
- l_ant_items = new ArrayList<HGNode>(original_dt.getTailNodes());
- HyperEdge res = new HyperEdge(original_dt.getRule(), original_dt.getBestDerivationScore(),
+ l_ant_items = new ArrayList<>(original_dt.getTailNodes());
+ return new HyperEdge(original_dt.getRule(), original_dt.getBestDerivationScore(),
original_dt.getTransitionLogP(false), l_ant_items, original_dt.getSourcePath());
- return res;
}
// ############### split hg #####
@@ -125,7 +124,7 @@ public abstract class SplitHg {
private void split_item(HGNode it) {
if (g_tbl_split_virtual_items.containsKey(it))
return;// already processed
- HashMap<String, VirtualItem> virtual_item_sigs = new HashMap<String, VirtualItem>();
+ HashMap<String, VirtualItem> virtual_item_sigs = new HashMap<>();
// ### recursive call on each deduction
if (speed_up_item(it)) {
for (HyperEdge dt : it.hyperedges) {
@@ -134,9 +133,8 @@ public abstract class SplitHg {
}
// ### item-specific operation
// a list of items result by splitting me
- ArrayList<VirtualItem> l_virtual_items = new ArrayList<VirtualItem>();
- for (String signature : virtual_item_sigs.keySet())
- l_virtual_items.add(virtual_item_sigs.get(signature));
+ ArrayList<VirtualItem> l_virtual_items = virtual_item_sigs.keySet().stream()
+ .map(virtual_item_sigs::get).collect(Collectors.toCollection(ArrayList::new));
g_tbl_split_virtual_items.put(it, l_virtual_items);
g_num_virtual_items += l_virtual_items.size();
// if(virtual_item_sigs.size()!=1)System.out.println("num of split items is " +
@@ -146,7 +144,7 @@ public abstract class SplitHg {
private void split_deduction(HyperEdge cur_dt, HashMap<String, VirtualItem> virtual_item_sigs,
HGNode parent_item) {
- if (speed_up_deduction(cur_dt) == false)
+ if (!speed_up_deduction(cur_dt))
return;// no need to continue
// ### recursively split all my ant items, get a l_split_items for each original item
@@ -168,7 +166,7 @@ public abstract class SplitHg {
ArrayList<VirtualItem> l_virtual_items = g_tbl_split_virtual_items.get(it);
for (VirtualItem ant_virtual_item : l_virtual_items) {
// used in combination
- ArrayList<VirtualItem> l_ant_virtual_item = new ArrayList<VirtualItem>();
+ ArrayList<VirtualItem> l_ant_virtual_item = new ArrayList<>();
l_ant_virtual_item.add(ant_virtual_item);
process_one_combination_nonaxiom(parent_item, virtual_item_sigs, cur_dt,
l_ant_virtual_item);
@@ -182,7 +180,7 @@ public abstract class SplitHg {
for (VirtualItem virtual_it1 : l_virtual_items1) {
for (VirtualItem virtual_it2 : l_virtual_items2) {
// used in combination
- ArrayList<VirtualItem> l_ant_virtual_item = new ArrayList<VirtualItem>();
+ ArrayList<VirtualItem> l_ant_virtual_item = new ArrayList<>();
l_ant_virtual_item.add(virtual_it1);
l_ant_virtual_item.add(virtual_it2);
process_one_combination_nonaxiom(parent_item, virtual_item_sigs, cur_dt,
@@ -208,7 +206,7 @@ public abstract class SplitHg {
throw new RuntimeException("deduction is null");
}
String sig = VirtualItem.get_signature(parent_item, dpstate);
- VirtualItem t_virtual_item = (VirtualItem) virtual_item_sigs.get(sig);
+ VirtualItem t_virtual_item = virtual_item_sigs.get(sig);
if (t_virtual_item != null) {
t_virtual_item.add_deduction(t_ded, dpstate, maintain_onebest_only);
} else {
@@ -229,7 +227,7 @@ public abstract class SplitHg {
protected abstract static class DPState {
protected abstract String get_signature();
- };
+ }
/*
* In general, variables of items (1) list of hyperedges (2) best hyperedge (3) DP state (4)
@@ -250,10 +248,9 @@ public abstract class SplitHg {
}
public void add_deduction(VirtualDeduction fdt, DPState dstate, boolean maintain_onebest_only) {
- if (maintain_onebest_only == false) {
+ if (!maintain_onebest_only) {
if (l_virtual_deductions == null)
- l_virtual_deductions = new ArrayList<VirtualDeduction>();
- ;
+ l_virtual_deductions = new ArrayList<>();
l_virtual_deductions.add(fdt);
}
if (best_virtual_deduction == null || fdt.best_cost < best_virtual_deduction.best_cost) {
[5/7] incubator-joshua git commit: static analysis based code
improvements on pro package
Posted by to...@apache.org.
static analysis based code improvements on pro package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2c3c0ad2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2c3c0ad2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2c3c0ad2
Branch: refs/heads/master
Commit: 2c3c0ad2c474e8cfdc632b160287d4d2d8e90f32
Parents: 13f258d
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 13:59:57 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 13:59:57 2016 +0200
----------------------------------------------------------------------
.../apache/joshua/pro/ClassifierPerceptron.java | 4 +-
.../java/org/apache/joshua/pro/Optimizer.java | 134 +++++----
.../java/org/apache/joshua/pro/PROCore.java | 278 +++++++++++--------
3 files changed, 225 insertions(+), 191 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2c3c0ad2/src/main/java/org/apache/joshua/pro/ClassifierPerceptron.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/pro/ClassifierPerceptron.java b/src/main/java/org/apache/joshua/pro/ClassifierPerceptron.java
index 1b5d75c..a0f0f6f 100755
--- a/src/main/java/org/apache/joshua/pro/ClassifierPerceptron.java
+++ b/src/main/java/org/apache/joshua/pro/ClassifierPerceptron.java
@@ -54,8 +54,8 @@ public class ClassifierPerceptron implements ClassifierInterface {
numError = 0;
// numPosSamp = 0;
- for (int s = 0; s < sampleSize; s++) {
- featVal = samples.get(s).split("\\s+");
+ for (String sample : samples) {
+ featVal = sample.split("\\s+");
// only consider positive samples
// if( featVal[featDim].equals("1") )
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2c3c0ad2/src/main/java/org/apache/joshua/pro/Optimizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/pro/Optimizer.java b/src/main/java/org/apache/joshua/pro/Optimizer.java
index ad80305..a1f8a90 100755
--- a/src/main/java/org/apache/joshua/pro/Optimizer.java
+++ b/src/main/java/org/apache/joshua/pro/Optimizer.java
@@ -67,7 +67,7 @@ public class Optimizer {
// set classifier parameters
myClassifier.setClassifierParam(classifierParam);
//run classifier
- finalLambda = myClassifier.runClassifier(allSamples, initialLambda, paramDim);
+ double[] finalLambda = myClassifier.runClassifier(allSamples, initialLambda, paramDim);
normalizeLambda(finalLambda);
//parameters that are not optimizable are assigned with initial values
for ( int i = 1; i < isOptimizable.length; ++i ) {
@@ -121,17 +121,16 @@ public class Optimizer {
// find out the 1-best candidate for each sentence
maxModelScore = NegInf;
- for (Iterator<String> it = candSet.iterator(); it.hasNext();) {
+ for (String aCandSet : candSet) {
modelScore = 0.0;
- candStr = it.next().toString();
+ candStr = aCandSet;
feat_str = feat_hash[i].get(candStr).split("\\s+");
- for (int f = 0; f < feat_str.length; f++) {
- String[] feat_info = feat_str[f].split("[=]");
- modelScore +=
- Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
- }
+ for (String aFeat_str : feat_str) {
+ String[] feat_info = aFeat_str.split("[=]");
+ modelScore += Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
+ }
if (maxModelScore < modelScore) {
maxModelScore = modelScore;
@@ -147,10 +146,10 @@ public class Optimizer {
}
public Vector<String> process_Params() {
- Vector<String> allSamples = new Vector<String>(); // to save all sampled pairs
+ Vector<String> allSamples = new Vector<>(); // to save all sampled pairs
// sampling
- Vector<String> sampleVec = new Vector<String>(); // use String to make sparse representation
+ Vector<String> sampleVec = new Vector<>(); // use String to make sparse representation
// easy
for (int i = 0; i < sentNum; i++) {
sampleVec = Sampler(i);
@@ -162,18 +161,18 @@ public class Optimizer {
private Vector<String> Sampler(int sentId) {
int candCount = stats_hash[sentId].size();
- Vector<String> sampleVec = new Vector<String>();
- HashMap<String, Double> candScore = new HashMap<String, Double>(); // metric(e.g BLEU) score of
+ Vector<String> sampleVec = new Vector<>();
+ HashMap<String, Double> candScore = new HashMap<>(); // metric(e.g BLEU) score of
// all candidates
// extract all candidates to a string array to save time in computing BLEU score
String[] cands = new String[candCount];
Set<String> candSet = stats_hash[sentId].keySet();
- HashMap<Integer, String> candMap = new HashMap<Integer, String>();
+ HashMap<Integer, String> candMap = new HashMap<>();
int candId = 0;
- for (Iterator<String> it = candSet.iterator(); it.hasNext();) {
- cands[candId] = it.next().toString();
+ for (String aCandSet : candSet) {
+ cands[candId] = aCandSet;
candMap.put(candId, cands[candId]); // map an integer to each candidate
candId++;
}
@@ -183,7 +182,7 @@ public class Optimizer {
double scoreDiff;
double probAccept;
boolean accept;
- HashMap<String, Double> acceptedPair = new HashMap<String, Double>();
+ HashMap<String, Double> acceptedPair = new HashMap<>();
if (Tau < candCount * (candCount - 1)) // otherwise no need to sample
{
@@ -204,7 +203,7 @@ public class Optimizer {
// System.err.println("Diff: " + scoreDiff + " = " + candScore.get(candMap.get(j1)) + " - "
// + candScore.get(candMap.get(j2)));
- accept = randgen.nextDouble() <= probAccept ? true : false;
+ accept = randgen.nextDouble() <= probAccept;
if (accept) acceptedPair.put(j1 + " " + j2, scoreDiff);
}
@@ -216,7 +215,7 @@ public class Optimizer {
scoreDiff = Math.abs(candScore.get(candMap.get(i)) - candScore.get(candMap.get(j)));
probAccept = Alpha(scoreDiff);
- accept = randgen.nextDouble() <= probAccept ? true : false;
+ accept = randgen.nextDouble() <= probAccept;
if (accept) acceptedPair.put(i + " " + j, scoreDiff);
}
@@ -229,7 +228,7 @@ public class Optimizer {
// sort sampled pairs according to "scoreDiff"
ValueComparator comp = new ValueComparator(acceptedPair);
- TreeMap<String, Double> acceptedPairSort = new TreeMap<String, Double>(comp);
+ TreeMap<String, Double> acceptedPairSort = new TreeMap<>(comp);
acceptedPairSort.putAll(acceptedPair);
int topCount = 0;
@@ -238,7 +237,7 @@ public class Optimizer {
String[] feat_str_j1, feat_str_j2;
String j1Cand, j2Cand;
String featDiff, neg_featDiff;
- HashSet<String> added = new HashSet<String>(); // to avoid symmetric duplicate
+ HashSet<String> added = new HashSet<>(); // to avoid symmetric duplicate
for (String key : acceptedPairSort.keySet()) {
if (topCount == Xi) break;
@@ -262,31 +261,29 @@ public class Optimizer {
featDiff = "";
neg_featDiff = "";
- HashMap<Integer, String> feat_diff = new HashMap<Integer, String>();
+ HashMap<Integer, String> feat_diff = new HashMap<>();
String[] feat_info;
int feat_id;
- for (int i = 0; i < feat_str_j1.length; i++) {
- feat_info = feat_str_j1[i].split("[=]");
- feat_id = Vocabulary.id(feat_info[0]);
- if ( (feat_id < isOptimizable.length &&
- isOptimizable[feat_id]) ||
- feat_id >= isOptimizable.length )
- feat_diff.put( feat_id, feat_info[1] );
+ for (String aFeat_str_j1 : feat_str_j1) {
+ feat_info = aFeat_str_j1.split("[=]");
+ feat_id = Vocabulary.id(feat_info[0]);
+ if ((feat_id < isOptimizable.length && isOptimizable[feat_id])
+ || feat_id >= isOptimizable.length)
+ feat_diff.put(feat_id, feat_info[1]);
+ }
+ for (String aFeat_str_j2 : feat_str_j2) {
+ feat_info = aFeat_str_j2.split("[=]");
+ feat_id = Vocabulary.id(feat_info[0]);
+ if ((feat_id < isOptimizable.length && isOptimizable[feat_id])
+ || feat_id >= isOptimizable.length) {
+ if (feat_diff.containsKey(feat_id))
+ feat_diff.put(feat_id, Double.toString(
+ Double.parseDouble(feat_diff.get(feat_id)) - Double.parseDouble(feat_info[1])));
+ else //only fired in the cand 2
+ feat_diff.put(feat_id, Double.toString(-1.0 * Double.parseDouble(feat_info[1])));
+ }
}
- for (int i = 0; i < feat_str_j2.length; i++) {
- feat_info = feat_str_j2[i].split("[=]");
- feat_id = Vocabulary.id(feat_info[0]);
- if ( (feat_id < isOptimizable.length &&
- isOptimizable[feat_id]) ||
- feat_id >= isOptimizable.length ) {
- if (feat_diff.containsKey(feat_id))
- feat_diff.put( feat_id,
- Double.toString(Double.parseDouble(feat_diff.get(feat_id))-Double.parseDouble(feat_info[1])) );
- else //only fired in the cand 2
- feat_diff.put( feat_id, Double.toString(-1.0*Double.parseDouble(feat_info[1])));
- }
- }
for (Integer id: feat_diff.keySet()) {
featDiff += id + ":" + feat_diff.get(id) + " ";
@@ -325,22 +322,22 @@ public class Optimizer {
// compute *sentence-level* metric score
private HashMap<String, Double> compute_Score(int sentId, String[] cands) {
- HashMap<String, Double> candScore = new HashMap<String, Double>();
+ HashMap<String, Double> candScore = new HashMap<>();
String statString;
String[] statVal_str;
int[] statVal = new int[evalMetric.get_suffStatsCount()];
// for all candidates
- for (int i = 0; i < cands.length; i++) {
- statString = stats_hash[sentId].get(cands[i]);
+ for (String cand : cands) {
+ statString = stats_hash[sentId].get(cand);
statVal_str = statString.split("\\s+");
for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
statVal[j] = Integer.parseInt(statVal_str[j]);
-// System.err.println("Score: " + evalMetric.score(statVal));
-
- candScore.put(cands[i], evalMetric.score(statVal));
+ // System.err.println("Score: " + evalMetric.score(statVal));
+
+ candScore.put(cand, evalMetric.score(statVal));
}
return candScore;
@@ -412,31 +409,30 @@ public class Optimizer {
return finalMetricScore;
}
- private EvaluationMetric evalMetric;
- private Vector<String> output;
- private boolean[] isOptimizable;
- private double[] initialLambda;
- private double[] finalLambda;
- private double[] normalizationOptions;
+ private final EvaluationMetric evalMetric;
+ private final Vector<String> output;
+ private final boolean[] isOptimizable;
+ private final double[] initialLambda;
+ private final double[] normalizationOptions;
private double finalMetricScore;
- private HashMap<String, String>[] feat_hash;
- private HashMap<String, String>[] stats_hash;
- private Random randgen;
- private int paramDim;
- private int sentNum;
- private int Tau; // size of sampled candidate set(say 5000)
- private int Xi; // choose top Xi candidates from sampled set(say 50)
- private double metricDiff; // metric difference threshold(to select the qualified candidates)
- private String classifierAlg; // optimization algorithm
- private String[] classifierParam;
-
- private final static double NegInf = (-1.0 / 0.0);
- private final static double PosInf = (+1.0 / 0.0);
+ private final HashMap<String, String>[] feat_hash;
+ private final HashMap<String, String>[] stats_hash;
+ private final Random randgen;
+ private final int paramDim;
+ private final int sentNum;
+ private final int Tau; // size of sampled candidate set(say 5000)
+ private final int Xi; // choose top Xi candidates from sampled set(say 50)
+ private final double metricDiff; // metric difference threshold(to select the qualified candidates)
+ private final String classifierAlg; // optimization algorithm
+ private final String[] classifierParam;
+
+ private final static double NegInf = Double.NEGATIVE_INFINITY;
+ private final static double PosInf = Double.POSITIVE_INFINITY;
}
class ValueComparator implements Comparator<Object> {
- Map<String,Double> base;
+ final Map<String,Double> base;
public ValueComparator(Map<String,Double> base) {
this.base = base;
@@ -444,9 +440,9 @@ class ValueComparator implements Comparator<Object> {
@Override
public int compare(Object a, Object b) {
- if ((Double) base.get(a) <= (Double) base.get(b))
+ if (base.get(a) <= base.get(b))
return 1;
- else if ((Double) base.get(a) == (Double) base.get(b))
+ else if (base.get(a) == base.get(b))
return 0;
else
return -1;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2c3c0ad2/src/main/java/org/apache/joshua/pro/PROCore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/pro/PROCore.java b/src/main/java/org/apache/joshua/pro/PROCore.java
index 5dc3311..b7c454d 100755
--- a/src/main/java/org/apache/joshua/pro/PROCore.java
+++ b/src/main/java/org/apache/joshua/pro/PROCore.java
@@ -34,6 +34,7 @@ import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -63,12 +64,11 @@ public class PROCore {
private static final Logger LOG = LoggerFactory.getLogger(PROCore.class);
private final JoshuaConfiguration joshuaConfiguration;
- private TreeSet<Integer>[] indicesOfInterest_all;
private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
- private final static double NegInf = (-1.0 / 0.0);
- private final static double PosInf = (+1.0 / 0.0);
+ private final static double NegInf = Double.NEGATIVE_INFINITY;
+ private final static double PosInf = Double.POSITIVE_INFINITY;
private final static double epsilon = 1.0 / 1000000;
private int verbosity; // anything of priority <= verbosity will be printed
@@ -84,7 +84,6 @@ public class PROCore {
// number of documents in the dev set
// this should be 1, unless doing doc-level optimization
- private int[] docOfSentence;
// docOfSentence[i] stores which document contains the i'th sentence.
// docOfSentence is 0-indexed, as are the documents (i.e. first doc is indexed 0)
@@ -130,9 +129,9 @@ public class PROCore {
/* *********************************************************** */
// private double[] lambda;
- private ArrayList<Double> lambda = new ArrayList<Double>();
+ private ArrayList<Double> lambda = new ArrayList<>();
// the current weight vector. NOTE: indexing starts at 1.
- private ArrayList<Double> bestLambda = new ArrayList<Double>();
+ private final ArrayList<Double> bestLambda = new ArrayList<>();
// the best weight vector across all iterations
private boolean[] isOptimizable;
@@ -154,7 +153,6 @@ public class PROCore {
private Decoder myDecoder;
// COMMENT OUT if decoder is not Joshua
- private String decoderCommand;
// the command that runs the decoder; read from decoderCommandFileName
private int decVerbosity;
@@ -164,7 +162,6 @@ public class PROCore {
private int validDecoderExitValue;
// return value from running the decoder command that indicates success
- private int numOptThreads;
// number of threads to run things in parallel
private int saveInterFiles;
@@ -237,9 +234,9 @@ public class PROCore {
// when returnBest = true
private boolean returnBest = false; // return the best weight during tuning
- private String dirPrefix; // where are all these files located?
private String paramsFileName, docInfoFileName, finalLambdaFileName;
- private String sourceFileName, refFileName, decoderOutFileName;
+ private String refFileName;
+ private String decoderOutFileName;
private String decoderConfigFileName, decoderCommandFileName;
private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
@@ -253,21 +250,21 @@ public class PROCore {
this.joshuaConfiguration = joshuaConfiguration;
}
- public PROCore(String[] args, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
+ public PROCore(String[] args, JoshuaConfiguration joshuaConfiguration) throws IOException {
this.joshuaConfiguration = joshuaConfiguration;
EvaluationMetric.set_knownMetrics();
processArgsArray(args);
initialize(0);
}
- public PROCore(String configFileName, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
+ public PROCore(String configFileName, JoshuaConfiguration joshuaConfiguration) throws IOException {
this.joshuaConfiguration = joshuaConfiguration;
EvaluationMetric.set_knownMetrics();
processArgsArray(cfgFileToArgsArray(configFileName));
initialize(0);
}
- private void initialize(int randsToSkip) throws FileNotFoundException, IOException {
+ private void initialize(int randsToSkip) throws IOException {
println("NegInf: " + NegInf + ", PosInf: " + PosInf + ", epsilon: " + epsilon, 4);
randGen = new Random(seed);
@@ -329,8 +326,8 @@ public class PROCore {
// and one line for the normalization method
// indexing starts at 1 in these arrays
for (int p = 0; p <= numParams; ++p)
- lambda.add(new Double(0));
- bestLambda.add(new Double(0));
+ lambda.add(0d);
+ bestLambda.add(0d);
// why only lambda is a list? because the size of lambda
// may increase over time, but other arrays are specified in
// the param config file, only used for initialization
@@ -348,6 +345,7 @@ public class PROCore {
String[][] refSentences = new String[numSentences][refsPerSen];
+ String decoderCommand;
try {
// read in reference sentences
@@ -482,10 +480,10 @@ public class PROCore {
@SuppressWarnings("unchecked")
TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
- indicesOfInterest_all = temp_TSA;
+ TreeSet<Integer>[] indicesOfInterest_all = temp_TSA;
for (int i = 0; i < numSentences; ++i) {
- indicesOfInterest_all[i] = new TreeSet<Integer>();
+ indicesOfInterest_all[i] = new TreeSet<>();
}
} // void initialize(...)
@@ -510,9 +508,9 @@ public class PROCore {
if (folder.exists()) {
File[] listOfFiles = folder.listFiles();
- for (int i = 0; i < listOfFiles.length; i++) {
- if (listOfFiles[i].isFile()) {
- files = listOfFiles[i].getName();
+ for (File listOfFile : listOfFiles) {
+ if (listOfFile.isFile()) {
+ files = listOfFile.getName();
if (files.startsWith("PRO.temp")) {
deleteFile(files);
}
@@ -617,11 +615,11 @@ public class PROCore {
// save feats and stats for all candidates(old & new)
HashMap<String, String>[] feat_hash = new HashMap[numSentences];
for (int i = 0; i < numSentences; i++)
- feat_hash[i] = new HashMap<String, String>();
+ feat_hash[i] = new HashMap<>();
HashMap<String, String>[] stats_hash = new HashMap[numSentences];
for (int i = 0; i < numSentences; i++)
- stats_hash[i] = new HashMap<String, String>();
+ stats_hash[i] = new HashMap<>();
while (!done) { // NOTE: this "loop" will only be carried out once
println("--- Starting PRO iteration #" + iteration + " @ " + (new Date()) + " ---", 1);
@@ -838,7 +836,7 @@ public class PROCore {
// (It's not actually a bug, but only because existingCandStats gets
// cleared before moving to the next source sentence.)
// FIX: should be made an array, indexed by i
- HashMap<String, String> existingCandStats = new HashMap<String, String>();
+ HashMap<String, String> existingCandStats = new HashMap<>();
// VERY IMPORTANT:
// A CANDIDATE X MAY APPEARED IN ITER 1, ITER 3
// BUT IF THE USER SPECIFIED TO CONSIDER ITERATIONS FROM ONLY ITER 2, THEN
@@ -910,7 +908,7 @@ public class PROCore {
// need to identify newly fired feats here
if (featId > numParams) {
++numParams;
- lambda.add(new Double(0));
+ lambda.add(0d);
}
}
}
@@ -936,7 +934,7 @@ public class PROCore {
String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest + 1];
- Vector<String> unknownCands_V = new Vector<String>();
+ Vector<String> unknownCands_V = new Vector<>();
// which candidates (of the i'th source sentence) have not been seen before
// this iteration?
@@ -1110,7 +1108,7 @@ public class PROCore {
// need to identify newly fired feats here
if (featId > numParams) {
++numParams;
- lambda.add(new Double(0));
+ lambda.add(0d);
}
}
}
@@ -1223,7 +1221,7 @@ public class PROCore {
lambda.set(p, bestLambda.get(p));
// and set the rest of lambda to be 0
for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
- lambda.set(p + bestLambda.size(), new Double(0));
+ lambda.set(p + bestLambda.size(), 0d);
}
return null; // this means that the old values should be kept by the caller
@@ -1239,7 +1237,7 @@ public class PROCore {
* System.exit(0);
*/
- Vector<String> output = new Vector<String>();
+ Vector<String> output = new Vector<>();
// note: initialLambda[] has length = numParamsOld
// augmented with new feature weights, initial values are 0
@@ -1286,8 +1284,8 @@ public class PROCore {
/************* end optimization **************/
- for (int i = 0; i < output.size(); i++)
- println(output.get(i));
+ for (String anOutput : output)
+ println(anOutput);
// check if any parameter has been updated
boolean anyParamChanged = false;
@@ -1366,7 +1364,7 @@ public class PROCore {
// use the new wt vector to decode the next iteration
// (interpolation with previous wt vector)
for (int i = 1; i <= numParams; i++)
- lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
+ lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i));
println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
println("", 1);
@@ -1400,9 +1398,9 @@ public class PROCore {
retStr += "(listing the first " + featToPrint + " lambdas)";
for (int c = 1; c <= featToPrint - 1; ++c) {
- retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
+ retStr += "" + String.format("%.4f", lambdaA.get(c)) + ", ";
}
- retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
+ retStr += "" + String.format("%.4f", lambdaA.get(numParams)) + "}";
return retStr;
}
@@ -1435,7 +1433,7 @@ public class PROCore {
println("Running external decoder...", 1);
try {
- ArrayList<String> cmd = new ArrayList<String>();
+ ArrayList<String> cmd = new ArrayList<>();
cmd.add(decoderCommandFileName);
if (passIterationToDecoder)
@@ -1584,7 +1582,7 @@ public class PROCore {
if (c_match == -1) {
outFile.println(line);
} else {
- if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
+ if (Math.abs(params.get(c_match)) > 1e-20)
outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
}
@@ -1593,7 +1591,7 @@ public class PROCore {
// now append weights of new features
for (int c = origFeatNum + 1; c <= numParams; ++c) {
- if (Math.abs(params.get(c).doubleValue()) > 1e-20)
+ if (Math.abs(params.get(c)) > 1e-20)
outFile.println(Vocabulary.word(c) + " " + params.get(c));
}
@@ -1624,16 +1622,20 @@ public class PROCore {
// read default value
lambda.set(c, inFile_init.nextDouble());
- defaultLambda[c] = lambda.get(c).doubleValue();
+ defaultLambda[c] = lambda.get(c);
// read isOptimizable
dummy = inFile_init.next();
- if (dummy.equals("Opt")) {
+ switch (dummy) {
+ case "Opt":
isOptimizable[c] = true;
- } else if (dummy.equals("Fix")) {
+ break;
+ case "Fix":
isOptimizable[c] = false;
- } else {
- throw new RuntimeException("Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
+ break;
+ default:
+ throw new RuntimeException(
+ "Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
}
if (!isOptimizable[c]) { // skip next two values
@@ -1702,9 +1704,11 @@ public class PROCore {
dummy = (origLine.substring(origLine.indexOf("=") + 1)).trim();
String[] dummyA = dummy.split("\\s+");
- if (dummyA[0].equals("none")) {
+ switch (dummyA[0]) {
+ case "none":
normalizationOptions[0] = 0;
- } else if (dummyA[0].equals("absval")) {
+ break;
+ case "absval":
normalizationOptions[0] = 1;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
String pName = dummyA[2];
@@ -1720,36 +1724,43 @@ public class PROCore {
throw new RuntimeException("Unrecognized feature name " + normalizationOptions[2]
+ " for absval normalization method.");
}
- } else if (dummyA[0].equals("maxabsval")) {
+ break;
+ case "maxabsval":
normalizationOptions[0] = 2;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
if (normalizationOptions[1] <= 0) {
- throw new RuntimeException("Value for the maxabsval normalization method must be positive.");
+ throw new RuntimeException(
+ "Value for the maxabsval normalization method must be positive.");
}
- } else if (dummyA[0].equals("minabsval")) {
+ break;
+ case "minabsval":
normalizationOptions[0] = 3;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
if (normalizationOptions[1] <= 0) {
- throw new RuntimeException("Value for the minabsval normalization method must be positive.");
+ throw new RuntimeException(
+ "Value for the minabsval normalization method must be positive.");
}
- } else if (dummyA[0].equals("LNorm")) {
+ break;
+ case "LNorm":
normalizationOptions[0] = 4;
normalizationOptions[1] = Double.parseDouble(dummyA[1]);
normalizationOptions[2] = Double.parseDouble(dummyA[2]);
if (normalizationOptions[1] <= 0 || normalizationOptions[2] <= 0) {
- throw new RuntimeException("Both values for the LNorm normalization method must be positive.");
+ throw new RuntimeException(
+ "Both values for the LNorm normalization method must be positive.");
}
- } else {
+ break;
+ default:
throw new RuntimeException("Unrecognized normalization method " + dummyA[0] + "; "
+ "must be one of none, absval, maxabsval, and LNorm.");
- } // if (dummyA[0])
+ }
inFile_init.close();
} // processParamFile()
private void processDocInfo() {
// sets numDocuments and docOfSentence[]
- docOfSentence = new int[numSentences];
+ int[] docOfSentence = new int[numSentences];
if (docInfoFileName == null) {
for (int i = 0; i < numSentences; ++i)
@@ -1806,7 +1817,7 @@ public class PROCore {
boolean format3 = false;
- HashSet<String> seenStrings = new HashSet<String>();
+ HashSet<String> seenStrings = new HashSet<>();
BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
for (int i = 0; i < numSentences; ++i) {
// set format3 = true if a duplicate is found
@@ -1818,8 +1829,8 @@ public class PROCore {
inFile.close();
- HashSet<String> seenDocNames = new HashSet<String>();
- HashMap<String, Integer> docOrder = new HashMap<String, Integer>();
+ HashSet<String> seenDocNames = new HashSet<>();
+ HashMap<String, Integer> docOrder = new HashMap<>();
// maps a document name to the order (0-indexed) in which it was seen
inFile = new BufferedReader(new FileReader(docInfoFileName));
@@ -1946,7 +1957,7 @@ public class PROCore {
try {
PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
for (int c = 1; c <= numParams; ++c) {
- outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
+ outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c));
}
outFile_lambdas.close();
@@ -1960,9 +1971,9 @@ public class PROCore {
private String[] cfgFileToArgsArray(String fileName) {
checkFile(fileName);
- Vector<String> argsVector = new Vector<String>();
+ Vector<String> argsVector = new Vector<>();
- try (BufferedReader inFile = new BufferedReader(new FileReader(fileName));) {
+ try (BufferedReader inFile = new BufferedReader(new FileReader(fileName))) {
String line, origLine;
do {
line = inFile.readLine();
@@ -1970,7 +1981,7 @@ public class PROCore {
if (line != null && line.length() > 0 && line.charAt(0) != '#') {
- if (line.indexOf("#") != -1) { // discard comment
+ if (line.contains("#")) { // discard comment
line = line.substring(0, line.indexOf("#"));
}
@@ -1993,7 +2004,7 @@ public class PROCore {
// CMU MODIFICATION(FROM METEOR FOR ZMERT)
// Parse args
- ArrayList<String> argList = new ArrayList<String>();
+ ArrayList<String> argList = new ArrayList<>();
StringBuilder arg = new StringBuilder();
boolean quoted = false;
for (int i = 0; i < line.length(); i++) {
@@ -2026,9 +2037,7 @@ public class PROCore {
argsVector.add(paramA[1]);
} else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
// -m (metricName), -docSet are allowed to have extra optinos
- for (int opt = 0; opt < paramA.length; ++opt) {
- argsVector.add(paramA[opt]);
- }
+ Collections.addAll(argsVector, paramA);
} else {
throw new RuntimeException("Malformed line in config file:" + origLine);
}
@@ -2058,8 +2067,8 @@ public class PROCore {
private void processArgsArray(String[] args, boolean firstTime) {
/* set default values */
// Relevant files
- dirPrefix = null;
- sourceFileName = null;
+ String dirPrefix = null;
+ String sourceFileName = null;
refFileName = "reference.txt";
refsPerSen = 1;
textNormMethod = 1;
@@ -2082,7 +2091,7 @@ public class PROCore {
//
// /* possibly other early stopping criteria here */
//
- numOptThreads = 1;
+ int numOptThreads = 1;
saveInterFiles = 3;
compressFiles = 0;
oneModificationPerIteration = false;
@@ -2108,30 +2117,39 @@ public class PROCore {
while (i < args.length) {
String option = args[i];
// Relevant files
- if (option.equals("-dir")) {
+ switch (option) {
+ case "-dir":
dirPrefix = args[i + 1];
- } else if (option.equals("-s")) {
+ break;
+ case "-s":
sourceFileName = args[i + 1];
- } else if (option.equals("-r")) {
+ break;
+ case "-r":
refFileName = args[i + 1];
- } else if (option.equals("-rps")) {
+ break;
+ case "-rps":
refsPerSen = Integer.parseInt(args[i + 1]);
if (refsPerSen < 1) {
throw new RuntimeException("refsPerSen must be positive.");
}
- } else if (option.equals("-txtNrm")) {
+ break;
+ case "-txtNrm":
textNormMethod = Integer.parseInt(args[i + 1]);
if (textNormMethod < 0 || textNormMethod > 4) {
throw new RuntimeException("textNormMethod should be between 0 and 4");
}
- } else if (option.equals("-p")) {
+ break;
+ case "-p":
paramsFileName = args[i + 1];
- } else if (option.equals("-docInfo")) {
+ break;
+ case "-docInfo":
docInfoFileName = args[i + 1];
- } else if (option.equals("-fin")) {
+ break;
+ case "-fin":
finalLambdaFileName = args[i + 1];
// MERT specs
- } else if (option.equals("-m")) {
+ break;
+ case "-m":
metricName = args[i + 1];
metricName_display = metricName;
if (EvaluationMetric.knownMetricName(metricName)) {
@@ -2144,7 +2162,8 @@ public class PROCore {
} else {
throw new RuntimeException("Unknown metric name " + metricName + ".");
}
- } else if (option.equals("-docSet")) {
+ break;
+ case "-docSet":
String method = args[i + 1];
if (method.equals("all")) {
@@ -2189,48 +2208,56 @@ public class PROCore {
} else {
throw new RuntimeException("Unknown docSet method " + method + ".");
}
- } else if (option.equals("-maxIt")) {
+ break;
+ case "-maxIt":
maxMERTIterations = Integer.parseInt(args[i + 1]);
if (maxMERTIterations < 1) {
throw new RuntimeException("maxMERTIts must be positive.");
}
- } else if (option.equals("-minIt")) {
+ break;
+ case "-minIt":
minMERTIterations = Integer.parseInt(args[i + 1]);
if (minMERTIterations < 1) {
throw new RuntimeException("minMERTIts must be positive.");
}
- } else if (option.equals("-prevIt")) {
+ break;
+ case "-prevIt":
prevMERTIterations = Integer.parseInt(args[i + 1]);
if (prevMERTIterations < 0) {
throw new RuntimeException("prevMERTIts must be non-negative.");
}
- } else if (option.equals("-stopIt")) {
+ break;
+ case "-stopIt":
stopMinIts = Integer.parseInt(args[i + 1]);
if (stopMinIts < 1) {
throw new RuntimeException("stopMinIts must be positive.");
}
- } else if (option.equals("-stopSig")) {
+ break;
+ case "-stopSig":
stopSigValue = Double.parseDouble(args[i + 1]);
- }
+ break;
//
// /* possibly other early stopping criteria here */
//
- else if (option.equals("-thrCnt")) {
+ case "-thrCnt":
numOptThreads = Integer.parseInt(args[i + 1]);
if (numOptThreads < 1) {
throw new RuntimeException("threadCount must be positive.");
}
- } else if (option.equals("-save")) {
+ break;
+ case "-save":
saveInterFiles = Integer.parseInt(args[i + 1]);
if (saveInterFiles < 0 || saveInterFiles > 3) {
throw new RuntimeException("save should be between 0 and 3");
}
- } else if (option.equals("-compress")) {
+ break;
+ case "-compress":
compressFiles = Integer.parseInt(args[i + 1]);
if (compressFiles < 0 || compressFiles > 1) {
throw new RuntimeException("compressFiles should be either 0 or 1");
}
- } else if (option.equals("-opi")) {
+ break;
+ case "-opi":
int opi = Integer.parseInt(args[i + 1]);
if (opi == 1) {
oneModificationPerIteration = true;
@@ -2239,7 +2266,8 @@ public class PROCore {
} else {
throw new RuntimeException("oncePerIt must be either 0 or 1.");
}
- } else if (option.equals("-rand")) {
+ break;
+ case "-rand":
int rand = Integer.parseInt(args[i + 1]);
if (rand == 1) {
randInit = true;
@@ -2248,13 +2276,14 @@ public class PROCore {
} else {
throw new RuntimeException("randInit must be either 0 or 1.");
}
- } else if (option.equals("-seed")) {
+ break;
+ case "-seed":
if (args[i + 1].equals("time")) {
seed = System.currentTimeMillis();
} else {
seed = Long.parseLong(args[i + 1]);
}
- }
+ break;
/*
* else if (option.equals("-ud")) { useDisk = Integer.parseInt(args[i+1]); if (useDisk < 0 ||
* useDisk > 2) { println("useDisk should be between 0 and 2"); System.exit(10); } }
@@ -2262,23 +2291,23 @@ public class PROCore {
// for pro:
// classification algorithm class path
- else if (option.equals("-classifierClass")) {
+ case "-classifierClass":
classifierAlg = args[i + 1];
- }
+ break;
// params for the specified classifier
- else if (option.equals("-classifierParams")) {
+ case "-classifierParams":
classifierParams = args[i + 1].split("\\s+");
- }
+ break;
// tau: num of randomly generated candidates
- else if (option.equals("-Tau")) {
+ case "-Tau":
Tau = Integer.parseInt(args[i + 1]);
- }
+ break;
// xi: top-xi candidates to be accepted
- else if (option.equals("-Xi")) {
+ case "-Xi":
Xi = Integer.parseInt(args[i + 1]);
- }
+ break;
// return the best weight during tuning or not
- else if (option.equals("-returnBest")) {
+ case "-returnBest":
int retBest = Integer.parseInt(args[i + 1]);
if (retBest == 1)
returnBest = true;
@@ -2287,57 +2316,66 @@ public class PROCore {
else {
throw new RuntimeException("-returnBest must be either 0 or 1.");
}
- }
+ break;
// interpolation coefficient between current & previous weights
- else if (option.equals("-interCoef")) {
+ case "-interCoef":
interCoef = Double.parseDouble(args[i + 1]);
- }
+ break;
// metric(eg. bleu) diff threshold(to select sampled candidates)
- else if (option.equals("-metricDiff")) {
+ case "-metricDiff":
metricDiff = Double.parseDouble(args[i + 1]);
- }
+ break;
// Decoder specs
- else if (option.equals("-cmd")) {
+ case "-cmd":
decoderCommandFileName = args[i + 1];
- } else if (option.equals("-passIt")) {
+ break;
+ case "-passIt":
int val = Integer.parseInt(args[i + 1]);
if (val < 0 || val > 1) {
throw new RuntimeException("passIterationToDecoder should be either 0 or 1");
}
- passIterationToDecoder = (val == 1) ? true : false;
- } else if (option.equals("-decOut")) {
+ passIterationToDecoder = (val == 1);
+ break;
+ case "-decOut":
decoderOutFileName = args[i + 1];
- } else if (option.equals("-decExit")) {
+ break;
+ case "-decExit":
validDecoderExitValue = Integer.parseInt(args[i + 1]);
- } else if (option.equals("-dcfg")) {
+ break;
+ case "-dcfg":
decoderConfigFileName = args[i + 1];
- } else if (option.equals("-N")) {
+ break;
+ case "-N":
sizeOfNBest = Integer.parseInt(args[i + 1]);
if (sizeOfNBest < 1) {
throw new RuntimeException("N must be positive.");
}
- }
+ break;
// Output specs
- else if (option.equals("-v")) {
+ case "-v":
verbosity = Integer.parseInt(args[i + 1]);
if (verbosity < 0 || verbosity > 4) {
throw new RuntimeException("verbosity should be between 0 and 4");
}
- } else if (option.equals("-decV")) {
+ break;
+ case "-decV":
decVerbosity = Integer.parseInt(args[i + 1]);
if (decVerbosity < 0 || decVerbosity > 1) {
throw new RuntimeException("decVerbosity should be either 0 or 1");
}
- } else if (option.equals("-fake")) {
+ break;
+ case "-fake":
fakeFileNameTemplate = args[i + 1];
int QM_i = fakeFileNameTemplate.indexOf("?");
if (QM_i <= 0) {
- throw new RuntimeException("fakeFileNameTemplate must contain '?' to indicate position of iteration number");
+ throw new RuntimeException(
+ "fakeFileNameTemplate must contain '?' to indicate position of iteration number");
}
fakeFileNamePrefix = fakeFileNameTemplate.substring(0, QM_i);
fakeFileNameSuffix = fakeFileNameTemplate.substring(QM_i + 1);
- } else {
+ break;
+ default:
throw new RuntimeException("Unknown option " + option);
}
@@ -2615,7 +2653,7 @@ public class PROCore {
outFileName = prefix + ".all";
}
- try (PrintWriter outFile = new PrintWriter(outFileName);) {
+ try (PrintWriter outFile = new PrintWriter(outFileName)) {
BufferedReader[] inFile = new BufferedReader[numFiles];
int nextIndex;
@@ -2693,7 +2731,7 @@ public class PROCore {
str = " " + str + " ";
str = str.replaceAll("\\s+", " ");
- TreeSet<Integer> splitIndices = new TreeSet<Integer>();
+ TreeSet<Integer> splitIndices = new TreeSet<>();
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
@@ -2740,7 +2778,7 @@ public class PROCore {
// remove spaces around dashes
if (normMethod == 2 || normMethod == 4) {
- TreeSet<Integer> skipIndices = new TreeSet<Integer>();
+ TreeSet<Integer> skipIndices = new TreeSet<>();
str = " " + str + " ";
for (int i = 0; i < str.length(); ++i) {
@@ -2811,7 +2849,7 @@ public class PROCore {
}
private ArrayList<Double> randomLambda() {
- ArrayList<Double> retLambda = new ArrayList<Double>(1 + numParams);
+ ArrayList<Double> retLambda = new ArrayList<>(1 + numParams);
for (int c = 1; c <= numParams; ++c) {
if (isOptimizable[c]) {
[2/7] incubator-joshua git commit: static analysis based code
improvements on metrics package
Posted by to...@apache.org.
static analysis based code improvements on metrics package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/23ebc372
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/23ebc372
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/23ebc372
Branch: refs/heads/master
Commit: 23ebc3721f6b17c2ea3eb40202fa33646590caf6
Parents: 2d3911c
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 13:49:40 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 13:49:40 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/joshua/metrics/BLEU.java | 27 ++++----
.../java/org/apache/joshua/metrics/CHRF.java | 37 +++++-----
.../apache/joshua/metrics/EvaluationMetric.java | 72 ++++++++++++--------
.../apache/joshua/metrics/GradeLevelBLEU.java | 24 +++----
.../java/org/apache/joshua/metrics/METEOR.java | 40 ++++++-----
.../joshua/metrics/MinimumChangeBLEU.java | 8 +--
.../java/org/apache/joshua/metrics/Precis.java | 8 +--
.../joshua/metrics/PrecisMinusSourceBLEU.java | 24 +++----
.../java/org/apache/joshua/metrics/SARI.java | 26 ++++---
.../org/apache/joshua/metrics/SourceBLEU.java | 4 +-
.../java/org/apache/joshua/metrics/TER.java | 32 +++++----
.../org/apache/joshua/metrics/TERMinusBLEU.java | 24 +++----
.../org/apache/joshua/metrics/TercomRunner.java | 10 +--
13 files changed, 170 insertions(+), 166 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/BLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/BLEU.java b/src/main/java/org/apache/joshua/metrics/BLEU.java
index f1b50e7..f80faa1 100644
--- a/src/main/java/org/apache/joshua/metrics/BLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/BLEU.java
@@ -55,13 +55,16 @@ public class BLEU extends EvaluationMetric {
throw new RuntimeException("Maximum gram length must be positive");
}
- if (methodStr.equals("closest")) {
+ switch (methodStr) {
+ case "closest":
setEffLengthMethod(EffectiveLengthMethod.CLOSEST);
- } else if (methodStr.equals("shortest")) {
+ break;
+ case "shortest":
setEffLengthMethod(EffectiveLengthMethod.SHORTEST);
// } else if (methodStr.equals("average")) {
// effLengthMethod = EffectiveLengthMethod.AVERAGE;
- } else {
+ break;
+ default:
LOG.error("Unknown effective length method string {}", methodStr);
// System.out.println("Should be one of closest, shortest, or average.");
LOG.error("Should be one of closest or shortest.");
@@ -109,8 +112,8 @@ public class BLEU extends EvaluationMetric {
HashMap<String, Integer>[] temp_HMA = new HashMap[numSentences];
maxNgramCounts = temp_HMA;
- String gram = "";
- int oldCount = 0, nextCount = 0;
+ String gram;
+ int oldCount, nextCount;
for (int i = 0; i < numSentences; ++i) {
maxNgramCounts[i] = getNgramCountsAll(refSentences[i][0]);
@@ -188,14 +191,12 @@ public class BLEU extends EvaluationMetric {
for (int n = 1; n <= getMaxGramLength(); ++n) {
int correctGramCount = 0;
- String gram = "";
- int candGramCount = 0, maxRefGramCount = 0, clippedCount = 0;
-
- Iterator<String> it = (candCountsArray[n].keySet()).iterator();
+ String gram;
+ int candGramCount, maxRefGramCount, clippedCount;
- while (it.hasNext()) {
+ for (String s : (candCountsArray[n].keySet())) {
// for each n-gram type in the candidate
- gram = it.next();
+ gram = s;
candGramCount = candCountsArray[n].get(gram);
// if (maxNgramCounts[i][n].containsKey(gram)) {
// maxRefGramCount = maxNgramCounts[i][n].get(gram);
@@ -411,7 +412,7 @@ public class BLEU extends EvaluationMetric {
HashMap<String, Integer>[] ngramCountsArray = new HashMap[1 + getMaxGramLength()];
ngramCountsArray[0] = null;
for (int n = 1; n <= getMaxGramLength(); ++n) {
- ngramCountsArray[n] = new HashMap<String, Integer>();
+ ngramCountsArray[n] = new HashMap<>();
}
int len = words.length;
@@ -481,7 +482,7 @@ public class BLEU extends EvaluationMetric {
}
public HashMap<String, Integer> getNgramCountsAll(String[] words) {
- HashMap<String, Integer> ngramCountsAll = new HashMap<String, Integer>();
+ HashMap<String, Integer> ngramCountsAll = new HashMap<>();
int len = words.length;
String gram;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/CHRF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/CHRF.java b/src/main/java/org/apache/joshua/metrics/CHRF.java
index dcf606a..c1a0cff 100644
--- a/src/main/java/org/apache/joshua/metrics/CHRF.java
+++ b/src/main/java/org/apache/joshua/metrics/CHRF.java
@@ -131,12 +131,12 @@ public class CHRF extends EvaluationMetric {
HashMap<String, Integer>[] grams = new HashMap[1 + maxGramLength];
grams[0] = null;
for (int n = 1; n <= maxGramLength; ++n) {
- grams[n] = new HashMap<String, Integer>();
+ grams[n] = new HashMap<>();
}
for (int n=1; n<=maxGramLength; n++){
- String gram = "";
+ String gram;
for (int i = 0; i < s.length() - n + 1; i++){
gram = s.substring(i, i+n);
if(grams[n].containsKey(gram)){
@@ -169,26 +169,23 @@ public class CHRF extends EvaluationMetric {
int[] to_return = {0,0};
String gram;
int cand_grams = 0;
- int candGramCount = 0, refGramCount = 0;
+ int candGramCount, refGramCount;
int errors = 0;
- Iterator<String> it = (cand.keySet()).iterator();
-
- while (it.hasNext()) {
- gram = it.next();
- candGramCount = cand.get(gram);
- cand_grams += candGramCount;
- if (ref.containsKey(gram)) {
- refGramCount = ref.get(gram);
- if(candGramCount>refGramCount){
- int error_here = candGramCount - refGramCount;
- errors += error_here;
- }
- } else {
- refGramCount = 0;
- errors += candGramCount;
- }
+ for (String s : (cand.keySet())) {
+ gram = s;
+ candGramCount = cand.get(gram);
+ cand_grams += candGramCount;
+ if (ref.containsKey(gram)) {
+ refGramCount = ref.get(gram);
+ if (candGramCount > refGramCount) {
+ int error_here = candGramCount - refGramCount;
+ errors += error_here;
}
+ } else {
+ errors += candGramCount;
+ }
+ }
//System.out.println(" Ngrams not found: " + not_found);
@@ -236,7 +233,7 @@ public class CHRF extends EvaluationMetric {
double[] recalls = new double[maxGramLength+1];
double[] fs = new double[maxGramLength+1];
//double[] scs = new double[maxGramLength+1];
- double totalF = 0, totalSC = 0;
+ double totalF = 0, totalSC;
double lp = 1;
if (stats.length != suffStatsCount) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java b/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
index 9ac77f1..3067d10 100644
--- a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
+++ b/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
@@ -53,7 +53,7 @@ public abstract class EvaluationMetric {
/* static (=> also non-abstract) methods */
public static void set_knownMetrics() {
- metricOptionCount = new TreeMap<String, Integer>();
+ metricOptionCount = new TreeMap<>();
metricOptionCount.put("BLEU", 2);
// the "BLEU" metric expects an options array of length 2
@@ -83,44 +83,64 @@ public abstract class EvaluationMetric {
public static EvaluationMetric getMetric(String metricName, String[] metricOptions) {
EvaluationMetric retMetric = null;
- if (metricName.equals("BLEU")) {
+ switch (metricName) {
+ case "BLEU":
retMetric = new BLEU(metricOptions); // the "BLEU" metric corresponds to the BLEU class
- } else if (metricName.equals("BLEU_SBP")) {
+
+ break;
+ case "BLEU_SBP":
retMetric = new BLEU_SBP(metricOptions); // the "BLEU_SBP" metric corresponds to the BLEU_SBP
- // class
- } else if (metricName.equals("01LOSS")) {
+
+ // class
+ break;
+ case "01LOSS":
retMetric = new ZeroOneLoss(metricOptions); // the "01LOSS" metric corresponds to the
- // ZeroOneLoss class
- } else if (metricName.equals("TER")) {
+
+ // ZeroOneLoss class
+ break;
+ case "TER":
retMetric = new TER(metricOptions); // the "TER" metric corresponds to the TER class
+
// } else if (metricName.equals("METEOR")) {
// retMetric = new METEOR(metricOptions); // the "METEOR" metric corresponds to the METEOR
// class
// } else if (metricName.equals("RYPT")) {
// retMetric = new RYPT(metricOptions); // the "RYPT" metric corresponds to the RYPT class
- } else if (metricName.equals("TER-BLEU")) {
+ break;
+ case "TER-BLEU":
retMetric = new TERMinusBLEU(metricOptions); // the "TER-BLEU" metric corresponds to the
- // TERMinusBLEU class
+
+ // TERMinusBLEU class
// } else if (metricName.equals("WER")) {
// retMetric = new WordErrorRate(metricOptions); // the "WER" metric corresponds to the
// WordErrorRate class
- } else if (metricName.equals("MC_BLEU")) {
+ break;
+ case "MC_BLEU":
retMetric = new MinimumChangeBLEU(metricOptions); // the "MC_BLEU" metric corresponds to the
- // ParaphraseBLEU class
- } else if (metricName.equals("PRECIS")) {
+
+ // ParaphraseBLEU class
+ break;
+ case "PRECIS":
retMetric = new Precis(metricOptions);
- } else if (metricName.equals("SRC_BLEU")) {
+ break;
+ case "SRC_BLEU":
retMetric = new SourceBLEU(metricOptions);
- } else if (metricName.equals("PRECIS-SRC_BLEU")) {
+ break;
+ case "PRECIS-SRC_BLEU":
retMetric = new PrecisMinusSourceBLEU(metricOptions);
- } else if (metricName.equals("GL_BLEU")) {
+ break;
+ case "GL_BLEU":
retMetric = new GradeLevelBLEU(metricOptions); // the "GL_BLEU" metric corresponds to the
- // GradeLevelBLEU class
- } else if (metricName.equals("SARI")) {
+
+ // GradeLevelBLEU class
+ break;
+ case "SARI":
retMetric = new SARI(metricOptions);
-
- } else if (metricName.equals("CHRF")) {
- retMetric = new CHRF(metricOptions);
+
+ break;
+ case "CHRF":
+ retMetric = new CHRF(metricOptions);
+ break;
}
return retMetric;
@@ -145,9 +165,7 @@ public abstract class EvaluationMetric {
public static void set_refSentences(String[][] refs) {
refSentences = new String[numSentences][refsPerSen];
for (int i = 0; i < numSentences; ++i) {
- for (int r = 0; r < refsPerSen; ++r) {
- refSentences[i][r] = refs[i][r];
- }
+ System.arraycopy(refs[i], 0, refSentences[i], 0, refsPerSen);
}
}
@@ -190,9 +208,7 @@ public abstract class EvaluationMetric {
int[][] SS = suffStats(SA, IA);
int[] stats = new int[suffStatsCount];
- for (int s = 0; s < suffStatsCount; ++s) {
- stats[s] = SS[0][s];
- }
+ System.arraycopy(SS[0], 0, stats, 0, suffStatsCount);
return score(stats);
}
@@ -241,9 +257,7 @@ public abstract class EvaluationMetric {
for (int d = 0; d < candCount; ++d) {
int[] currStats = suffStats(cand_strings[d], cand_indices[d]);
- for (int s = 0; s < suffStatsCount; ++s) {
- stats[d][s] = currStats[s];
- }
+ System.arraycopy(currStats, 0, stats[d], 0, suffStatsCount);
} // for (d)
return stats;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/GradeLevelBLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/GradeLevelBLEU.java b/src/main/java/org/apache/joshua/metrics/GradeLevelBLEU.java
index e143572..8bda9b8 100644
--- a/src/main/java/org/apache/joshua/metrics/GradeLevelBLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/GradeLevelBLEU.java
@@ -36,13 +36,14 @@ public class GradeLevelBLEU extends BLEU {
private static final Pattern syllable = Pattern.compile("([^aeiouy]*[aeiouy]+)");
private static final Pattern silentE = Pattern.compile("[^aeiou]e$");
private static final int SOURCE = 0, CANDIDATE = 1, REFERENCE = 2;
- private int srcIndex = 1, sentCountIndex;
+ private final int srcIndex = 1;
+ private int sentCountIndex;
private SourceBLEU srcBLEU;
private double targetGL = 9.87; // tune.simp avg GL = 9.8704 (tune.en =
// 14.0785
private double alpha = 0.9;
private boolean useTarget = true;
- private boolean useBLEUplus = true;
+ private final boolean useBLEUplus = true;
public GradeLevelBLEU() {
super();
@@ -76,9 +77,7 @@ public class GradeLevelBLEU extends BLEU {
String line;
int i = 0;
while (i < numSentences && (line = br.readLine()) != null) {
- for (int r = 0; r < refsPerSen; ++r) {
- newRefSentences[i][r] = refSentences[i][r];
- }
+ System.arraycopy(refSentences[i], 0, newRefSentences[i], 0, refsPerSen);
newRefSentences[i][refsPerSen] = line.trim();
i++;
}
@@ -98,7 +97,7 @@ public class GradeLevelBLEU extends BLEU {
public int[] suffStats(String cand_str, int i) {
int[] stats = new int[suffStatsCount];
- String[] candidate_tokens = null;
+ String[] candidate_tokens;
if (!cand_str.equals("")) {
candidate_tokens = cand_str.split("\\s+");
@@ -113,9 +112,8 @@ public class GradeLevelBLEU extends BLEU {
// set source BLEU stats
if (useBLEUplus) {
int[] src_prec_suffStats = srcBLEU.suffStats(cand_str, i);
- for (int j = 0; j < src_prec_suffStats.length; j++) {
- stats[2 * getMaxGramLength() + j] = src_prec_suffStats[j];
- }
+ System.arraycopy(src_prec_suffStats, 0, stats, 2 * getMaxGramLength() + 0,
+ src_prec_suffStats.length);
}
// now set the readability stats
@@ -194,7 +192,7 @@ public class GradeLevelBLEU extends BLEU {
double candGL =
gradeLevel(stats[tokenLength(CANDIDATE)], stats[syllableLength(CANDIDATE)],
stats[sentCountIndex]);
- double readabilityPenalty = 1;
+ double readabilityPenalty;
if (useTarget) {
readabilityPenalty = getReadabilityPenalty(candGL, targetGL);
@@ -207,9 +205,7 @@ public class GradeLevelBLEU extends BLEU {
if (useBLEUplus) {
int[] srcStats = new int[2 * getMaxGramLength()];
- for (int i = 0; i < 2 * getMaxGramLength(); i++) {
- srcStats[i] = stats[2 * getMaxGramLength() + i];
- }
+ System.arraycopy(stats, 2 * getMaxGramLength(), srcStats, 0, 2 * getMaxGramLength());
srcStats[2 * getMaxGramLength()] = stats[tokenLength(CANDIDATE)];
srcStats[2 * getMaxGramLength()] = stats[tokenLength(SOURCE)];
double srcBLEUscore = srcBLEU.score(stats);
@@ -246,7 +242,7 @@ public class GradeLevelBLEU extends BLEU {
double ref_gl =
gradeLevel(stats[tokenLength(REFERENCE)], stats[syllableLength(REFERENCE)],
stats[sentCountIndex]);
- double penalty = 1;
+ double penalty;
double bleu_ref = super.score(stats);
double bleu_src = srcBLEU.score(stats);
double bleu_plus = BLEU_plus(bleu_ref, bleu_src);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/METEOR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/METEOR.java b/src/main/java/org/apache/joshua/metrics/METEOR.java
index 33232db..370e707 100644
--- a/src/main/java/org/apache/joshua/metrics/METEOR.java
+++ b/src/main/java/org/apache/joshua/metrics/METEOR.java
@@ -31,7 +31,6 @@ public class METEOR extends EvaluationMetric {
protected String targetLanguage;
protected boolean normalize;
protected boolean keepPunctuation;
- private int maxComputations;
public METEOR(String[] Metric_options) {
// M_o[0]: -l language, one of {en,cz,fr,de,es}
@@ -41,27 +40,36 @@ public class METEOR extends EvaluationMetric {
// default in meteor v0.8: en, norm_no, removePunc
- if (Metric_options[0].equals("en")) {
+ switch (Metric_options[0]) {
+ case "en":
targetLanguage = "en";
- } else if (Metric_options[0].equals("cz")) {
+ break;
+ case "cz":
targetLanguage = "cz";
- } else if (Metric_options[0].equals("fr")) {
+ break;
+ case "fr":
targetLanguage = "fr";
- } else if (Metric_options[0].equals("de")) {
+ break;
+ case "de":
targetLanguage = "de";
- } else if (Metric_options[0].equals("es")) {
+ break;
+ case "es":
targetLanguage = "es";
- } else {
- String msg = "Unknown language string " + Metric_options[0]
- + ". Should be one of {en,cz,fr,de,es}.";
+ break;
+ default:
+ String msg =
+ "Unknown language string " + Metric_options[0] + ". Should be one of {en,cz,fr,de,es}.";
throw new RuntimeException(msg);
}
- if (Metric_options[1].equals("norm_yes")) {
+ switch (Metric_options[1]) {
+ case "norm_yes":
normalize = true;
- } else if (Metric_options[1].equals("norm_no")) {
+ break;
+ case "norm_no":
normalize = false;
- } else {
+ break;
+ default:
String msg = "Unknown normalize string " + Metric_options[1]
+ ". Should be one of norm_yes or norm_no.";
throw new RuntimeException(msg);
@@ -77,7 +85,7 @@ public class METEOR extends EvaluationMetric {
throw new RuntimeException(msg);
}
- maxComputations = Integer.parseInt(Metric_options[3]);
+ int maxComputations = Integer.parseInt(Metric_options[3]);
if (maxComputations < 1) {
throw new RuntimeException("Maximum computations must be positive");
}
@@ -127,8 +135,8 @@ public class METEOR extends EvaluationMetric {
OutputStreamWriter outStreamWriter = new OutputStreamWriter(outStream, "utf8");
BufferedWriter outFile = new BufferedWriter(outStreamWriter);
- for (int d = 0; d < candCount; ++d) {
- writeLine(cand_strings[d], outFile);
+ for (String cand_string : cand_strings) {
+ writeLine(cand_string, outFile);
}
outFile.close();
@@ -175,7 +183,7 @@ public class METEOR extends EvaluationMetric {
// 3) Read SS from output file produced by meteor
BufferedReader inFile = new BufferedReader(new FileReader("TER_out.ter"));
- String line = "";
+ String line;
line = inFile.readLine(); // skip hyp line
line = inFile.readLine(); // skip ref line
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/MinimumChangeBLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/MinimumChangeBLEU.java b/src/main/java/org/apache/joshua/metrics/MinimumChangeBLEU.java
index 6d19ba5..16444a7 100644
--- a/src/main/java/org/apache/joshua/metrics/MinimumChangeBLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/MinimumChangeBLEU.java
@@ -31,8 +31,8 @@ public class MinimumChangeBLEU extends BLEU {
// we assume that the source for the paraphrasing run is
// part of the set of references
- private int sourceReferenceIndex;
- private double thresholdWER;
+ private final int sourceReferenceIndex;
+ private final double thresholdWER;
public MinimumChangeBLEU() {
@@ -67,8 +67,8 @@ public class MinimumChangeBLEU extends BLEU {
HashMap<String, Integer>[] temp_HMA = new HashMap[numSentences];
maxNgramCounts = temp_HMA;
- String gram = "";
- int oldCount = 0, nextCount = 0;
+ String gram;
+ int oldCount, nextCount;
for (int i = 0; i < numSentences; ++i) {
// update counts as necessary from the reference translations
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/Precis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/Precis.java b/src/main/java/org/apache/joshua/metrics/Precis.java
index b2d852b..c620d7d 100644
--- a/src/main/java/org/apache/joshua/metrics/Precis.java
+++ b/src/main/java/org/apache/joshua/metrics/Precis.java
@@ -34,12 +34,12 @@ public class Precis extends BLEU {
// We assume that the source for the paraphrasing run is
// part of the set of references, this is its index.
- private int sourceReferenceIndex;
+ private final int sourceReferenceIndex;
// A global target compression rate to achieve
// if negative, we default to locally aiming for the compression
// rate given by the (closest) reference compression?
- private double targetCompressionRate;
+ private final double targetCompressionRate;
// Are we optimizing for character-based compression (as opposed
// to token-based)?
@@ -112,8 +112,8 @@ public class Precis extends BLEU {
HashMap<String, Integer>[] temp_HMA = new HashMap[numSentences];
maxNgramCounts = temp_HMA;
- String gram = "";
- int oldCount = 0, nextCount = 0;
+ String gram;
+ int oldCount, nextCount;
for (int i = 0; i < numSentences; ++i) {
// update counts as necessary from the reference translations
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/PrecisMinusSourceBLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/PrecisMinusSourceBLEU.java b/src/main/java/org/apache/joshua/metrics/PrecisMinusSourceBLEU.java
index bfe15d0..4827fc9 100644
--- a/src/main/java/org/apache/joshua/metrics/PrecisMinusSourceBLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/PrecisMinusSourceBLEU.java
@@ -27,10 +27,10 @@ import java.io.PrintWriter;
public class PrecisMinusSourceBLEU extends EvaluationMetric {
- private Precis myPrecis;
- private SourceBLEU mySourceBLEU;
+ private final Precis myPrecis;
+ private final SourceBLEU mySourceBLEU;
- private double bleuWeight;
+ private final double bleuWeight;
private int precisCount;
private int sourceBleuCount;
@@ -142,16 +142,12 @@ public class PrecisMinusSourceBLEU extends EvaluationMetric {
+ " vs. " + suffStatsCount + ") in PrecisMinusSourceBLEU.score(int[])");
}
- double sc = 0.0;
+ double sc;
int[] stats_Precis = new int[precisCount];
int[] stats_SourceBLEU = new int[sourceBleuCount];
- for (int s = 0; s < precisCount; ++s) {
- stats_Precis[s] = stats[s];
- }
- for (int s = 0; s < sourceBleuCount; ++s) {
- stats_SourceBLEU[s] = stats[s + precisCount];
- }
+ System.arraycopy(stats, 0, stats_Precis, 0, precisCount);
+ System.arraycopy(stats, 0 + precisCount, stats_SourceBLEU, 0, sourceBleuCount);
double sc_T = myPrecis.score(stats_Precis);
double sc_B = mySourceBLEU.score(stats_SourceBLEU);
@@ -164,12 +160,8 @@ public class PrecisMinusSourceBLEU extends EvaluationMetric {
public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
int[] stats_Precis = new int[precisCount];
int[] stats_SourceBLEU = new int[sourceBleuCount];
- for (int s = 0; s < precisCount; ++s) {
- stats_Precis[s] = stats[s];
- }
- for (int s = 0; s < sourceBleuCount; ++s) {
- stats_SourceBLEU[s] = stats[s + precisCount];
- }
+ System.arraycopy(stats, 0, stats_Precis, 0, precisCount);
+ System.arraycopy(stats, 0 + precisCount, stats_SourceBLEU, 0, sourceBleuCount);
System.out.println("---PRECIS---");
myPrecis.printDetailedScore_fromStats(stats_Precis, oneLiner);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/SARI.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/SARI.java b/src/main/java/org/apache/joshua/metrics/SARI.java
index 9ee3af3..990690b 100644
--- a/src/main/java/org/apache/joshua/metrics/SARI.java
+++ b/src/main/java/org/apache/joshua/metrics/SARI.java
@@ -119,8 +119,8 @@ public class SARI extends EvaluationMetric {
HashMap<String, Integer>[][] temp_HMA = new HashMap[numSentences][maxGramLength];
refNgramCounts = temp_HMA;
- String gram = "";
- int oldCount = 0, nextCount = 0;
+ String gram;
+ int oldCount, nextCount;
for (int i = 0; i < numSentences; ++i) {
refNgramCounts[i] = getNgramCountsArray(refSentences[i][0]);
@@ -133,10 +133,8 @@ public class SARI extends EvaluationMetric {
for (int n = 1; n <= maxGramLength; ++n) {
- Iterator<String> it = (nextNgramCounts[n].keySet()).iterator();
-
- while (it.hasNext()) {
- gram = it.next();
+ for (String s : (nextNgramCounts[n].keySet())) {
+ gram = s;
nextCount = nextNgramCounts[n].get(gram);
if (refNgramCounts[i][n].containsKey(gram)) { // update if necessary
@@ -378,7 +376,7 @@ public class SARI extends EvaluationMetric {
public HashMap<String, Integer> substractHashMap(HashMap<String, Integer> counter1,
HashMap<String, Integer> counter2) {
- HashMap<String, Integer> newcounter = new HashMap<String, Integer>();
+ HashMap<String, Integer> newcounter = new HashMap<>();
for (Map.Entry<String, Integer> e : counter1.entrySet()) {
String ngram = e.getKey();
@@ -394,7 +392,7 @@ public class SARI extends EvaluationMetric {
// HashMap result = counter1*ratio1 - counter2*ratio2
public HashMap<String, Integer> substractHashMap(HashMap<String, Integer> counter1,
HashMap<String, Integer> counter2, int ratio1, int ratio2) {
- HashMap<String, Integer> newcounter = new HashMap<String, Integer>();
+ HashMap<String, Integer> newcounter = new HashMap<>();
for (Map.Entry<String, Integer> e : counter1.entrySet()) {
String ngram = e.getKey();
@@ -411,7 +409,7 @@ public class SARI extends EvaluationMetric {
public HashMap<String, Double> divideHashMap(HashMap<String, Integer> counter1,
HashMap<String, Integer> counter2) {
- HashMap<String, Double> newcounter = new HashMap<String, Double>();
+ HashMap<String, Double> newcounter = new HashMap<>();
for (Map.Entry<String, Integer> e : counter1.entrySet()) {
String ngram = e.getKey();
@@ -427,7 +425,7 @@ public class SARI extends EvaluationMetric {
public HashMap<String, Integer> intersectHashMap(HashMap<String, Integer> counter1,
HashMap<String, Integer> counter2) {
- HashMap<String, Integer> newcounter = new HashMap<String, Integer>();
+ HashMap<String, Integer> newcounter = new HashMap<>();
for (Map.Entry<String, Integer> e : counter1.entrySet()) {
String ngram = e.getKey();
@@ -443,7 +441,7 @@ public class SARI extends EvaluationMetric {
// HashMap result = (counter1*ratio1) & (counter2*ratio2)
public HashMap<String, Integer> intersectHashMap(HashMap<String, Integer> counter1,
HashMap<String, Integer> counter2, int ratio1, int ratio2) {
- HashMap<String, Integer> newcounter = new HashMap<String, Integer>();
+ HashMap<String, Integer> newcounter = new HashMap<>();
for (Map.Entry<String, Integer> e : counter1.entrySet()) {
String ngram = e.getKey();
@@ -479,7 +477,7 @@ public class SARI extends EvaluationMetric {
HashMap<String, Integer>[] ngramCountsArray = new HashMap[1 + maxGramLength];
ngramCountsArray[0] = null;
for (int n = 1; n <= maxGramLength; ++n) {
- ngramCountsArray[n] = new HashMap<String, Integer>();
+ ngramCountsArray[n] = new HashMap<>();
}
int len = words.length;
@@ -549,7 +547,7 @@ public class SARI extends EvaluationMetric {
}
public HashMap<String, Integer> getNgramCountsAll(String[] words) {
- HashMap<String, Integer> ngramCountsAll = new HashMap<String, Integer>();
+ HashMap<String, Integer> ngramCountsAll = new HashMap<>();
int len = words.length;
String gram;
@@ -625,6 +623,6 @@ public class SARI extends EvaluationMetric {
private enum StatIndex {
KEEPBOTH, KEEPCAND, KEEPREF, DELBOTH, DELCAND, DELREF, ADDBOTH, ADDCAND, ADDREF, KEEPBOTH2
- };
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/SourceBLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/SourceBLEU.java b/src/main/java/org/apache/joshua/metrics/SourceBLEU.java
index f594954..d9c5b3b 100644
--- a/src/main/java/org/apache/joshua/metrics/SourceBLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/SourceBLEU.java
@@ -23,10 +23,10 @@ import java.util.HashMap;
public class SourceBLEU extends BLEU {
// We assume that the source for the paraphrasing run is
// part of the set of references
- private int sourceReferenceIndex;
+ private final int sourceReferenceIndex;
private int[] sourceWordCount;
- private boolean useBrevityPenalty;
+ private final boolean useBrevityPenalty;
public SourceBLEU() {
super();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/TER.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/TER.java b/src/main/java/org/apache/joshua/metrics/TER.java
index 0dcf9d9..1cd2a00 100644
--- a/src/main/java/org/apache/joshua/metrics/TER.java
+++ b/src/main/java/org/apache/joshua/metrics/TER.java
@@ -52,21 +52,27 @@ public class TER extends EvaluationMetric {
// for 0-3, default values in tercom-0.7.25 are: nocase, punc, 20, 50
- if (Metric_options[0].equals("case")) {
+ switch (Metric_options[0]) {
+ case "case":
caseSensitive = true;
- } else if (Metric_options[0].equals("nocase")) {
+ break;
+ case "nocase":
caseSensitive = false;
- } else {
+ break;
+ default:
String msg = "Unknown case sensitivity string " + Metric_options[0]
+ ". Should be one of case or nocase.";
throw new RuntimeException(msg);
}
- if (Metric_options[1].equals("punc")) {
+ switch (Metric_options[1]) {
+ case "punc":
withPunctuation = true;
- } else if (Metric_options[1].equals("nopunc")) {
+ break;
+ case "nopunc":
withPunctuation = false;
- } else {
+ break;
+ default:
String msg = "Unknown with-punctuation string " + Metric_options[1]
+ ". Should be one of punc or nopunc.";
throw new RuntimeException(msg);
@@ -119,7 +125,7 @@ public class TER extends EvaluationMetric {
}
public double worstPossibleScore() {
- return (+1.0 / 0.0);
+ return Double.POSITIVE_INFINITY;
}
public int[] suffStats(String cand_str, int i) {
@@ -176,7 +182,7 @@ public class TER extends EvaluationMetric {
// 3) Read SS from output file produced by tercom.7.25.jar
BufferedReader inFile = new BufferedReader(new FileReader("TER_out.ter"));
- String line = "";
+ String line;
line = inFile.readLine(); // skip hyp line
line = inFile.readLine(); // skip ref line
@@ -283,7 +289,7 @@ public class TER extends EvaluationMetric {
OutputStreamWriter outStreamWriter = new OutputStreamWriter(outStream, "utf8");
BufferedWriter outFile = new BufferedWriter(outStreamWriter);
- String line_cand = "";
+ String line_cand;
if (numCands > 0) {
for (int d = 0; d < numCands; ++d) {
@@ -326,7 +332,7 @@ public class TER extends EvaluationMetric {
OutputStreamWriter outStreamWriter = new OutputStreamWriter(outStream, "utf8");
BufferedWriter outFile = new BufferedWriter(outStreamWriter);
- String line_index = "";
+ String line_index;
if (numIndices > 0) {
for (int d = 0; d < numIndices; ++d) {
@@ -366,7 +372,7 @@ public class TER extends EvaluationMetric {
}
public int runTercom(String refFileName, String hypFileName, String outFileNamePrefix, int memSize) {
- int exitValue = -1;
+ int exitValue;
try {
@@ -408,7 +414,7 @@ public class TER extends EvaluationMetric {
public void copySS(String inputFileName, PrintWriter outFile) {
try {
BufferedReader inFile = new BufferedReader(new FileReader(inputFileName));
- String line = "";
+ String line;
line = inFile.readLine(); // skip hyp line
line = inFile.readLine(); // skip ref line
@@ -434,7 +440,7 @@ public class TER extends EvaluationMetric {
+ " vs. " + suffStatsCount + ") in TER.score(int[])");
}
- double sc = 0.0;
+ double sc;
sc = stats[0] / (double) stats[1];
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/TERMinusBLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/TERMinusBLEU.java b/src/main/java/org/apache/joshua/metrics/TERMinusBLEU.java
index bd40140..fe38913 100644
--- a/src/main/java/org/apache/joshua/metrics/TERMinusBLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/TERMinusBLEU.java
@@ -27,8 +27,8 @@ import java.io.PrintWriter;
public class TERMinusBLEU extends EvaluationMetric {
// individual components
- private TER myTER;
- private BLEU myBLEU;
+ private final TER myTER;
+ private final BLEU myBLEU;
private int suffStatsCount_TER;
private int suffStatsCount_BLEU;
@@ -63,7 +63,7 @@ public class TERMinusBLEU extends EvaluationMetric {
}
public double worstPossibleScore() {
- return (+1.0 / 0.0);
+ return Double.POSITIVE_INFINITY;
}
public int[] suffStats(String cand_str, int i) {
@@ -154,16 +154,12 @@ public class TERMinusBLEU extends EvaluationMetric {
+ " vs. " + suffStatsCount + ") in TERMinusBLEU.score(int[])");
}
- double sc = 0.0;
+ double sc;
int[] stats_TER = new int[suffStatsCount_TER];
int[] stats_BLEU = new int[suffStatsCount_BLEU];
- for (int s = 0; s < suffStatsCount_TER; ++s) {
- stats_TER[s] = stats[s];
- }
- for (int s = 0; s < suffStatsCount_BLEU; ++s) {
- stats_BLEU[s] = stats[s + suffStatsCount_TER];
- }
+ System.arraycopy(stats, 0, stats_TER, 0, suffStatsCount_TER);
+ System.arraycopy(stats, 0 + suffStatsCount_TER, stats_BLEU, 0, suffStatsCount_BLEU);
double sc_T = myTER.score(stats_TER);
double sc_B = myBLEU.score(stats_BLEU);
@@ -176,12 +172,8 @@ public class TERMinusBLEU extends EvaluationMetric {
public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
int[] stats_TER = new int[suffStatsCount_TER];
int[] stats_BLEU = new int[suffStatsCount_BLEU];
- for (int s = 0; s < suffStatsCount_TER; ++s) {
- stats_TER[s] = stats[s];
- }
- for (int s = 0; s < suffStatsCount_BLEU; ++s) {
- stats_BLEU[s] = stats[s + suffStatsCount_TER];
- }
+ System.arraycopy(stats, 0, stats_TER, 0, suffStatsCount_TER);
+ System.arraycopy(stats, 0 + suffStatsCount_TER, stats_BLEU, 0, suffStatsCount_BLEU);
System.out.println("---TER---");
myTER.printDetailedScore_fromStats(stats_TER, oneLiner);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/23ebc372/src/main/java/org/apache/joshua/metrics/TercomRunner.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/TercomRunner.java b/src/main/java/org/apache/joshua/metrics/TercomRunner.java
index d7eeae5..6c78eb2 100644
--- a/src/main/java/org/apache/joshua/metrics/TercomRunner.java
+++ b/src/main/java/org/apache/joshua/metrics/TercomRunner.java
@@ -27,12 +27,12 @@ import org.apache.joshua.util.StreamGobbler;
public class TercomRunner implements Runnable {
/* non-static data members */
- private Semaphore blocker;
+ private final Semaphore blocker;
- private String refFileName;
- private String hypFileName;
- private String outFileNamePrefix;
- private int memSize;
+ private final String refFileName;
+ private final String hypFileName;
+ private final String outFileNamePrefix;
+ private final int memSize;
/* static data members */
private static boolean caseSensitive;
[6/7] incubator-joshua git commit: static analysis based code
improvements on server package
Posted by to...@apache.org.
static analysis based code improvements on server package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1d012a1b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1d012a1b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1d012a1b
Branch: refs/heads/master
Commit: 1d012a1bfa40de48e8f4d3337a9659c95aed8bcf
Parents: 2c3c0ad
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Sep 26 14:02:21 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Sep 26 14:02:21 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/server/ServerThread.java | 94 +++++++++++---------
.../org/apache/joshua/server/TcpServer.java | 4 +-
2 files changed, 55 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1d012a1b/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 976e543..638c194 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -110,7 +110,7 @@ public class ServerThread extends Thread implements HttpHandler {
}
public HashMap<String, String> queryToMap(String query) throws UnsupportedEncodingException {
- HashMap<String, String> result = new HashMap<String, String>();
+ HashMap<String, String> result = new HashMap<>();
for (String param : query.split("&")) {
String pair[] = param.split("=");
if (pair.length > 1) {
@@ -185,113 +185,125 @@ public class ServerThread extends Thread implements HttpHandler {
* Processes metadata commands received in the HTTP request. Some commands result in sending data back.
*
* @param meta the metadata request
- * @return result string (for some commands)
*/
private void handleMetadata(String meta, JSONMessage message) {
String[] tokens = meta.split("\\s+", 2);
String type = tokens[0];
String args = tokens.length > 1 ? tokens[1] : "";
-
- if (type.equals("get_weight")) {
+
+ switch (type) {
+ case "get_weight":
String weight = tokens[1];
LOG.info("WEIGHT: %s = %.3f", weight, Decoder.weights.getWeight(weight));
- } else if (type.equals("set_weights")) {
+ break;
+ case "set_weights": {
// Change a decoder weight
String[] argTokens = args.split("\\s+");
for (int i = 0; i < argTokens.length; i += 2) {
String feature = argTokens[i];
- String newValue = argTokens[i+1];
+ String newValue = argTokens[i + 1];
float old_weight = Decoder.weights.getWeight(feature);
Decoder.weights.set(feature, Float.parseFloat(newValue));
- LOG.info("set_weights: {} {} -> {}", feature, old_weight, Decoder.weights.getWeight(feature));
+ LOG.info("set_weights: {} {} -> {}", feature, old_weight,
+ Decoder.weights.getWeight(feature));
}
-
+
message.addMetaData("weights " + Decoder.weights.toString());
-
- } else if (type.equals("get_weights")) {
+
+ break;
+ }
+ case "get_weights":
message.addMetaData("weights " + Decoder.weights.toString());
-
- } else if (type.equals("add_rule")) {
+
+ break;
+ case "add_rule": {
String argTokens[] = args.split(" \\|\\|\\| ");
-
+
if (argTokens.length < 3) {
LOG.error("* INVALID RULE '{}'", meta);
return;
}
-
+
String lhs = argTokens[0];
String source = argTokens[1];
String target = argTokens[2];
String featureStr = "";
String alignmentStr = "";
- if (argTokens.length > 3)
+ if (argTokens.length > 3)
featureStr = argTokens[3];
if (argTokens.length > 4)
alignmentStr = " ||| " + argTokens[4];
-
+
/* Prepend source and target side nonterminals for phrase-based decoding. Probably better
* handled in each grammar type's addRule() function.
*/
- String ruleString = (joshuaConfiguration.search_algorithm.equals("stack"))
- ? String.format("%s ||| [X,1] %s ||| [X,1] %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr)
- : String.format("%s ||| %s ||| %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr);
-
+ String ruleString = (joshuaConfiguration.search_algorithm.equals("stack")) ?
+ String
+ .format("%s ||| [X,1] %s ||| [X,1] %s ||| -1 %s %s", lhs, source, target, featureStr,
+ alignmentStr) :
+ String.format("%s ||| %s ||| %s ||| -1 %s %s", lhs, source, target, featureStr,
+ alignmentStr);
+
Rule rule = new HieroFormatReader().parseLine(ruleString);
decoder.addCustomRule(rule);
-
+
LOG.info("Added custom rule {}", rule.toString());
-
- } else if (type.equals("list_rules")) {
-
+
+ break;
+ }
+ case "list_rules":
+
LOG.info("list_rules");
-
+
// Walk the the grammar trie
- ArrayList<Trie> nodes = new ArrayList<Trie>();
+ ArrayList<Trie> nodes = new ArrayList<>();
nodes.add(decoder.getCustomPhraseTable().getTrieRoot());
-
+
while (nodes.size() > 0) {
Trie trie = nodes.remove(0);
-
+
if (trie == null)
continue;
-
+
if (trie.hasRules()) {
- for (Rule rule: trie.getRuleCollection().getRules()) {
+ for (Rule rule : trie.getRuleCollection().getRules()) {
message.addRule(rule.toString());
LOG.debug("Found rule: " + rule);
}
}
-
+
if (trie.getExtensions() != null)
nodes.addAll(trie.getExtensions());
}
-
- } else if (type.equals("remove_rule")) {
-
+
+ break;
+ case "remove_rule": {
+
Rule rule = new HieroFormatReader().parseLine(args);
-
+
LOG.info("remove_rule " + rule);
-
+
Trie trie = decoder.getCustomPhraseTable().getTrieRoot();
int[] sourceTokens = rule.getFrench();
- for (int i = 0; i < sourceTokens.length; i++) {
- Trie nextTrie = trie.match(sourceTokens[i]);
+ for (int sourceToken : sourceTokens) {
+ Trie nextTrie = trie.match(sourceToken);
if (nextTrie == null)
return;
-
+
trie = nextTrie;
}
if (trie.hasRules()) {
- for (Rule ruleCand: trie.getRuleCollection().getRules()) {
+ for (Rule ruleCand : trie.getRuleCollection().getRules()) {
if (Arrays.equals(rule.getEnglish(), ruleCand.getEnglish())) {
trie.getRuleCollection().getRules().remove(ruleCand);
break;
}
}
- return;
}
+ break;
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1d012a1b/src/main/java/org/apache/joshua/server/TcpServer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/TcpServer.java b/src/main/java/org/apache/joshua/server/TcpServer.java
index e054186..f4e2bf0 100644
--- a/src/main/java/org/apache/joshua/server/TcpServer.java
+++ b/src/main/java/org/apache/joshua/server/TcpServer.java
@@ -33,8 +33,8 @@ import org.slf4j.LoggerFactory;
public class TcpServer {
private static final Logger LOG = LoggerFactory.getLogger(TcpServer.class);
private final JoshuaConfiguration joshuaConfiguration;
- private Decoder decoder;
- private int port;
+ private final Decoder decoder;
+ private final int port;
public TcpServer(Decoder decoder, int port,JoshuaConfiguration joshuaConfiguration) {
this.joshuaConfiguration = joshuaConfiguration;