You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2016/12/26 13:34:05 UTC
[11/50] [abbrv] opennlp git commit: OPENNLP-887: Replace the Cache
class with a LinkedHashMap
OPENNLP-887: Replace the Cache class with a LinkedHashMap
This closes #9
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4da7f4c6
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4da7f4c6
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4da7f4c6
Branch: refs/heads/889
Commit: 4da7f4c643d45b57a6a94e34797481b476194336
Parents: 7d1123a
Author: smarthi <sm...@apache.org>
Authored: Mon Dec 19 12:51:54 2016 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Mon Dec 19 22:16:53 2016 +0100
----------------------------------------------------------------------
.../main/java/opennlp/tools/ml/BeamSearch.java | 10 +-
.../opennlp/tools/ngram/NGramGenerator.java | 4 +-
.../tools/parser/ChunkContextGenerator.java | 23 +-
.../postag/DefaultPOSContextGenerator.java | 15 +-
.../java/opennlp/tools/util/BeamSearch.java | 22 +-
.../src/main/java/opennlp/tools/util/Cache.java | 329 +------------------
.../util/featuregen/CachedFeatureGenerator.java | 8 +-
7 files changed, 42 insertions(+), 369 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
index 0ed5fe6..209d4af 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java
@@ -47,7 +47,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
protected MaxentModel model;
private double[] probs;
- private Cache contextsCache;
+ private Cache<String[], double[]> contextsCache;
private static final int zeroLog = -100000;
/**
@@ -66,7 +66,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
this.model = model;
if (cacheSize > 0) {
- contextsCache = new Cache(cacheSize);
+ contextsCache = new Cache<>(cacheSize);
}
this.probs = new double[model.getNumOutcomes()];
@@ -102,7 +102,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
String[] contexts = cg.getContext(i, sequence, outcomes, additionalContext);
double[] scores;
if (contextsCache != null) {
- scores = (double[]) contextsCache.get(contexts);
+ scores = contextsCache.get(contexts);
if (scores == null) {
scores = model.eval(contexts, probs);
contextsCache.put(contexts,scores);
@@ -113,9 +113,7 @@ public class BeamSearch<T> implements SequenceClassificationModel<T> {
}
double[] temp_scores = new double[scores.length];
- for (int c = 0; c < scores.length; c++) {
- temp_scores[c] = scores[c];
- }
+ System.arraycopy(scores, 0, temp_scores, 0, scores.length);
Arrays.sort(temp_scores);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java
index 7e05a93..f001ba2 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java
@@ -37,7 +37,7 @@ public class NGramGenerator {
*/
public static List<String> generate(List<String> input, int n, String separator) {
- List<String> outGrams = new ArrayList<String>();
+ List<String> outGrams = new ArrayList<>();
for (int i = 0; i < input.size() - (n - 2); i++) {
String gram = "";
if ((i + n) <= input.size()) {
@@ -59,7 +59,7 @@ public class NGramGenerator {
*/
public static List<String> generate(char[] input, int n, String separator) {
- List<String> outGrams = new ArrayList<String>();
+ List<String> outGrams = new ArrayList<>();
for (int i = 0; i < input.length - (n - 2); i++) {
String gram = "";
if ((i + n) <= input.length) {
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
index 3619c57..7471b3c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
@@ -29,7 +29,7 @@ import opennlp.tools.util.Cache;
public class ChunkContextGenerator implements ChunkerContextGenerator {
private static final String EOS = "eos";
- private Cache contextsCache;
+ private Cache<String, String[]> contextsCache;
private Object wordsKey;
@@ -40,13 +40,13 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
public ChunkContextGenerator(int cacheSize) {
super();
if (cacheSize > 0) {
- contextsCache = new Cache(cacheSize);
+ contextsCache = new Cache<>(cacheSize);
}
}
public String[] getContext(Object o) {
Object[] data = (Object[]) o;
- return getContext(((Integer) data[0]).intValue(), (String[]) data[1], (String[]) data[2], (String[]) data[3]);
+ return getContext((Integer) data[0], (String[]) data[1], (String[]) data[2], (String[]) data[3]);
}
public String[] getContext(int i, String[] words, String[] prevDecisions, Object[] ac) {
@@ -54,12 +54,11 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
}
public String[] getContext(int i, String[] words, String[] tags, String[] preds) {
- List<String> features = new ArrayList<String>(19);
- int x0 = i;
- int x_2 = x0 - 2;
- int x_1 = x0 - 1;
- int x2 = x0 + 2;
- int x1 = x0 + 1;
+ List<String> features = new ArrayList<>(19);
+ int x_2 = i - 2;
+ int x_1 = i - 1;
+ int x2 = i + 2;
+ int x1 = i + 1;
String w_2,w_1,w0,w1,w2;
String t_2,t_1,t0,t1,t2;
@@ -90,8 +89,8 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
}
// chunkandpostag(0)
- t0=tags[x0];
- w0=words[x0];
+ t0=tags[i];
+ w0=words[i];
// chunkandpostag(1)
if (x1 < tags.length) {
@@ -113,7 +112,7 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
w2=EOS;
}
- String cacheKey = x0+t_2+t1+t0+t1+t2+p_2+p_1;
+ String cacheKey = i +t_2+t1+t0+t1+t2+p_2+p_1;
if (contextsCache!= null) {
if (wordsKey == words) {
String[] contexts = (String[]) contextsCache.get(cacheKey);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
index e570c89..581fed5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
@@ -39,7 +39,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
private static Pattern hasCap = Pattern.compile("[A-Z]");
private static Pattern hasNum = Pattern.compile("[0-9]");
- private Cache contextsCache;
+ private Cache<String, String[]> contextsCache;
private Object wordsKey;
private Dictionary dict;
@@ -64,12 +64,12 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
this.dict = dict;
dictGram = new String[1];
if (cacheSize > 0) {
- contextsCache = new Cache(cacheSize);
+ contextsCache = new Cache<>(cacheSize);
}
}
protected static String[] getPrefixes(String lex) {
String[] prefs = new String[PREFIX_LENGTH];
- for (int li = 0, ll = PREFIX_LENGTH; li < ll; li++) {
+ for (int li = 0; li < PREFIX_LENGTH; li++) {
prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
}
return prefs;
@@ -77,7 +77,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
protected static String[] getSuffixes(String lex) {
String[] suffs = new String[SUFFIX_LENGTH];
- for (int li = 0, ll = SUFFIX_LENGTH; li < ll; li++) {
+ for (int li = 0; li < SUFFIX_LENGTH; li++) {
suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
}
return suffs;
@@ -95,10 +95,9 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
* @return The context for making a pos tag decision at the specified token index given the specified tokens and previous tags.
*/
public String[] getContext(int index, Object[] tokens, String[] tags) {
- String next, nextnext, lex, prev, prevprev;
+ String next, nextnext = null, lex, prev, prevprev = null;
String tagprev, tagprevprev;
tagprev = tagprevprev = null;
- next = nextnext = lex = prev = prevprev = null;
lex = tokens[index].toString();
if (tokens.length > index + 1) {
@@ -131,7 +130,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
String cacheKey = index+tagprev+tagprevprev;
if (contextsCache != null) {
if (wordsKey == tokens){
- String[] cachedContexts = (String[]) contextsCache.get(cacheKey);
+ String[] cachedContexts = contextsCache.get(cacheKey);
if (cachedContexts != null) {
return cachedContexts;
}
@@ -141,7 +140,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
wordsKey = tokens;
}
}
- List<String> e = new ArrayList<String>();
+ List<String> e = new ArrayList<>();
e.add("default");
// add the word itself
e.add("w=" + lex);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
index 8c460ea..95cbea9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
@@ -44,7 +44,7 @@ public class BeamSearch<T> {
private SequenceValidator<T> validator;
private double[] probs;
- private Cache contextsCache;
+ private Cache<String[], double[]> contextsCache;
private static final int zeroLog = -100000;
/**
@@ -72,7 +72,7 @@ public class BeamSearch<T> {
this.validator = validator;
if (cacheSize > 0) {
- contextsCache = new Cache(cacheSize);
+ contextsCache = new Cache<>(cacheSize);
}
this.probs = new double[model.getNumOutcomes()];
@@ -86,13 +86,7 @@ public class BeamSearch<T> {
* @see SequenceValidator
*/
private boolean validSequence(int i, T[] inputSequence, String[] outcomesSequence, String outcome) {
-
- if (validator != null) {
- return validator.validSequence(i, inputSequence, outcomesSequence, outcome);
- }
- else {
- return true;
- }
+ return validator == null || validator.validSequence(i, inputSequence, outcomesSequence, outcome);
}
public Sequence[] bestSequences(int numSequences, T[] sequence, Object[] additionalContext) {
@@ -110,8 +104,8 @@ public class BeamSearch<T> {
*/
public Sequence[] bestSequences(int numSequences, T[] sequence, Object[] additionalContext, double minSequenceScore) {
- Heap<Sequence> prev = new ListHeap<Sequence>(size);
- Heap<Sequence> next = new ListHeap<Sequence>(size);
+ Heap<Sequence> prev = new ListHeap<>(size);
+ Heap<Sequence> next = new ListHeap<>(size);
Heap<Sequence> tmp;
prev.add(new Sequence());
@@ -129,7 +123,7 @@ public class BeamSearch<T> {
String[] contexts = cg.getContext(i, sequence, outcomes, additionalContext);
double[] scores;
if (contextsCache != null) {
- scores = (double[]) contextsCache.get(contexts);
+ scores = contextsCache.get(contexts);
if (scores == null) {
scores = model.eval(contexts, probs);
contextsCache.put(contexts,scores);
@@ -140,9 +134,7 @@ public class BeamSearch<T> {
}
double[] temp_scores = new double[scores.length];
- for (int c = 0; c < scores.length; c++) {
- temp_scores[c] = scores[c];
- }
+ System.arraycopy(scores, 0, temp_scores, 0, scores.length);
Arrays.sort(temp_scores);
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java b/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java
index 5ae82fd..fb4d6cb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/Cache.java
@@ -17,337 +17,22 @@
package opennlp.tools.util;
-import java.util.Collection;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.Map;
-import java.util.Set;
/**
* Provides fixed size, pre-allocated, least recently used replacement cache.
*/
-@SuppressWarnings("unchecked")
-public class Cache implements Map {
+public class Cache<K,V> extends LinkedHashMap<K,V> {
- /** The element in the linked list which was most recently used. **/
- private DoubleLinkedListElement first;
- /** The element in the linked list which was least recently used. **/
- private DoubleLinkedListElement last;
- /** Temporary holder of the key of the least-recently-used element. */
- private Object lastKey;
- /** Temporary value used in swap. */
- private ObjectWrapper temp;
- /** Holds the object wrappers which the keys are mapped to. */
- private ObjectWrapper[] wrappers;
- /** Map which stores the keys and values of the cache. */
- private Map map;
- /** The size of the cache. */
- private int size;
+ private int capacity;
- /**
- * Creates a new cache of the specified size.
- * @param size The size of the cache.
- */
- public Cache(int size) {
- map = new HashMap(size);
- wrappers = new ObjectWrapper[size];
- this.size=size;
- Object o = new Object();
- first = new DoubleLinkedListElement(null, null, o);
- map.put(o, new ObjectWrapper(null, first));
- wrappers[0] = new ObjectWrapper(null, first);
-
- DoubleLinkedListElement e = first;
- for(int i=1; i<size; i++) {
- o = new Object();
- e = new DoubleLinkedListElement(e, null, o);
- wrappers[i] = new ObjectWrapper(null, e);
- map.put(o, wrappers[i]);
- e.prev.next = e;
- }
- last = e;
- }
-
- public void clear() {
- map.clear();
- DoubleLinkedListElement e = first;
- for (int oi=0;oi<size;oi++) {
- wrappers[oi].object=null;
- Object o = new Object();
- map.put(o,wrappers[oi]);
- e.object = o;
- e = e.next;
- }
- }
-
- public Object put(Object key, Object value) {
- ObjectWrapper o = (ObjectWrapper) map.get(key);
- if (o != null) {
- /*
- * this should never be the case, we only do a put on a cache miss which
- * means the current value wasn't in the cache. However if the user screws
- * up or wants to use this as a fixed size hash and puts the same thing in
- * the list twice then we update the value and more the key to the front of the
- * most recently used list.
- */
-
- // Move o's partner in the list to front
- DoubleLinkedListElement e = o.listItem;
-
- //move to front
- if (e != first) {
- //remove list item
- e.prev.next = e.next;
- if (e.next != null) {
- e.next.prev = e.prev;
- }
- else { //were moving last
- last = e.prev;
- }
-
- //put list item in front
- e.next = first;
- first.prev = e;
- e.prev = null;
-
- //update first
- first = e;
- }
- return o.object;
- }
- // Put o in the front and remove the last one
- lastKey = last.object; // store key to remove from hash later
- last.object = key; //update list element with new key
-
- // connect list item to front of list
- last.next = first;
- first.prev = last;
-
- // update first and last value
- first = last;
- last = last.prev;
- first.prev = null;
- last.next = null;
-
- // remove old value from cache
- temp = (ObjectWrapper) map.remove(lastKey);
- //update wrapper
- temp.object = value;
- temp.listItem = first;
-
- map.put(key, temp);
- return null;
- }
-
- public Object get(Object key) {
- ObjectWrapper o = (ObjectWrapper) map.get(key);
- if (o != null) {
- // Move it to the front
- DoubleLinkedListElement e = o.listItem;
-
- //move to front
- if (e != first) {
- //remove list item
- e.prev.next = e.next;
- if (e.next != null) {
- e.next.prev = e.prev;
- }
- else { //were moving last
- last = e.prev;
- }
- //put list item in front
- e.next = first;
- first.prev = e;
- e.prev = null;
-
- //update first
- first = e;
- }
- return o.object;
- }
- else {
- return null;
- }
- }
-
-
- public boolean containsKey(Object key) {
- return map.containsKey(key);
- }
-
- public boolean containsValue(Object value) {
- return map.containsValue(value);
- }
-
- public Set entrySet() {
- return map.entrySet();
- }
-
- public boolean isEmpty() {
- return map.isEmpty();
- }
-
- public Set keySet() {
- return map.keySet();
- }
-
- public void putAll(Map t) {
- map.putAll(t);
- }
-
- public Object remove(Object key) {
- return map.remove(key);
- }
-
- public int size() {
- return map.size();
- }
-
- public Collection values() {
- return map.values();
- }
-}
-
-class ObjectWrapper {
-
- public Object object;
- public DoubleLinkedListElement listItem;
-
- public ObjectWrapper(Object o,DoubleLinkedListElement li) {
- object = o;
- listItem = li;
- }
-
- public Object getObject() {
- return object;
- }
-
- public DoubleLinkedListElement getListItem() {
- return listItem;
- }
-
- public void setObject(Object o) {
- object = o;
- }
-
- public void setListItem(DoubleLinkedListElement li) {
- listItem = li;
- }
-
- public boolean eqauls(Object o) {
- return object.equals(o);
- }
-}
-
-class DoubleLinkedListElement {
-
- public DoubleLinkedListElement prev;
- public DoubleLinkedListElement next;
- public Object object;
-
- public DoubleLinkedListElement(DoubleLinkedListElement p,
- DoubleLinkedListElement n,
- Object o) {
- prev = p;
- next = n;
- object = o;
-
- if (p != null) {
- p.next = this;
- }
-
- if (n != null) {
- n.prev = this;
- }
- }
-}
-
-class DoubleLinkedList {
-
- DoubleLinkedListElement first;
- DoubleLinkedListElement last;
- DoubleLinkedListElement current;
-
- public DoubleLinkedList() {
- first = null;
- last = null;
- current = null;
- }
-
- public void addFirst(Object o) {
- first = new DoubleLinkedListElement(null, first, o);
-
- if (current.next == null) {
- last = current;
- }
- }
-
- public void addLast(Object o) {
- last = new DoubleLinkedListElement(last, null, o);
-
- if (current.prev == null) {
- first = current;
- }
- }
-
- public void insert(Object o) {
- if (current == null) {
- current = new DoubleLinkedListElement(null, null, o);
- }
- else {
- current = new DoubleLinkedListElement(current.prev, current, o);
- }
-
- if (current.prev == null) {
- first = current;
- }
-
- if (current.next == null) {
- last = current;
- }
- }
-
- public DoubleLinkedListElement getFirst() {
- current = first;
- return first;
- }
-
- public DoubleLinkedListElement getLast() {
- current = last;
- return last;
- }
-
- public DoubleLinkedListElement getCurrent() {
- return current;
- }
-
- public DoubleLinkedListElement next() {
- if (current.next != null) {
- current = current.next;
- }
- return current;
- }
-
- public DoubleLinkedListElement prev() {
- if (current.prev != null) {
- current = current.prev;
- }
- return current;
+ public Cache(final int capacity) {
+ this.capacity = capacity;
}
@Override
- public String toString() {
- DoubleLinkedListElement e = first;
- String s = "[" + e.object.toString();
-
- e = e.next;
-
- while (e != null) {
- s = s + ", " + e.object.toString();
- e = e.next;
- }
-
- s = s + "]";
-
- return s;
+ protected boolean removeEldestEntry(Map.Entry<K,V> eldest) {
+ return this.size() > this.capacity;
}
}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4da7f4c6/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java
index 2bdec5b..afb0a2c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java
@@ -32,14 +32,14 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator {
private String[] prevTokens;
- private Cache contextsCache;
+ private Cache<Integer, List<String>> contextsCache;
private long numberOfCacheHits;
private long numberOfCacheMisses;
public CachedFeatureGenerator(AdaptiveFeatureGenerator... generators) {
this.generator = new AggregatedFeatureGenerator(generators);
- contextsCache = new Cache(100);
+ contextsCache = new Cache<>(100);
}
@SuppressWarnings("unchecked")
@@ -49,7 +49,7 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator {
List<String> cacheFeatures;
if (tokens == prevTokens) {
- cacheFeatures = (List<String>) contextsCache.get(index);
+ cacheFeatures = contextsCache.get(index);
if (cacheFeatures != null) {
numberOfCacheHits++;
@@ -62,7 +62,7 @@ public class CachedFeatureGenerator implements AdaptiveFeatureGenerator {
prevTokens = tokens;
}
- cacheFeatures = new ArrayList<String>();
+ cacheFeatures = new ArrayList<>();
numberOfCacheMisses++;