You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/05/02 14:34:33 UTC
svn commit: r1591889 [9/14] - in /opennlp/trunk: opennlp-tools/lang/ml/
opennlp-tools/src/main/java/opennlp/tools/chunker/
opennlp-tools/src/main/java/opennlp/tools/cmdline/
opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/
opennlp-tools/src/m...
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java Fri May 2 12:34:23 2014
@@ -47,7 +47,7 @@ import opennlp.tools.util.model.ModelUti
* This tokenizer needs a statistical model to tokenize a text which reproduces
* the tokenization observed in the training data used to create the model.
* The {@link TokenizerModel} class encapsulates the model and provides
- * methods to create it from the binary representation.
+ * methods to create it from the binary representation.
* <p>
* A tokenizer instance is not thread safe. For each thread one tokenizer
* must be instantiated which can share one <code>TokenizerModel</code> instance
@@ -69,7 +69,7 @@ import opennlp.tools.util.model.ModelUti
* <br>
* String tokens[] = tokenizer.tokenize("A sentence to be tokenized.");
* </code>
- *
+ *
* @see Tokenizer
* @see TokenizerModel
* @see TokenSample
@@ -92,7 +92,7 @@ public class TokenizerME extends Abstrac
*/
@Deprecated
public static final Pattern alphaNumeric = Pattern.compile(Factory.DEFAULT_ALPHANUMERIC);
-
+
private final Pattern alphanumeric;
/**
@@ -147,7 +147,7 @@ public class TokenizerME extends Abstrac
newTokens = new ArrayList<Span>();
tokProbs = new ArrayList<Double>(50);
}
-
+
private static Set<String> getAbbreviations(Dictionary abbreviations) {
if(abbreviations == null) {
return Collections.<String>emptySet();
@@ -220,10 +220,10 @@ public class TokenizerME extends Abstrac
newTokens.toArray(spans);
return spans;
}
-
+
/**
* Trains a model for the {@link TokenizerME}.
- *
+ *
* @param samples
* the samples used for the training.
* @param factory
@@ -260,15 +260,15 @@ public class TokenizerME extends Abstrac
* @param samples the samples used for the training.
* @param useAlphaNumericOptimization - if true alpha numerics are skipped
* @param mlParams the machine learning train parameters
- *
+ *
* @return the trained {@link TokenizerModel}
*
* @throws IOException it throws an {@link IOException} if an {@link IOException}
* is thrown during IO operations on a temp file which is created during training.
* Or if reading from the {@link ObjectStream} fails.
- *
- * @deprecated Use
- * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
+ *
+ * @deprecated Use
+ * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
* and pass in a {@link TokenizerFactory}
*/
public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples,
@@ -276,7 +276,7 @@ public class TokenizerME extends Abstrac
return train(languageCode, samples, null, useAlphaNumericOptimization,
mlParams);
}
-
+
/**
* Trains a model for the {@link TokenizerME}.
*
@@ -285,15 +285,15 @@ public class TokenizerME extends Abstrac
* @param abbreviations an abbreviations dictionary
* @param useAlphaNumericOptimization - if true alpha numerics are skipped
* @param mlParams the machine learning train parameters
- *
+ *
* @return the trained {@link TokenizerModel}
*
* @throws IOException it throws an {@link IOException} if an {@link IOException}
* is thrown during IO operations on a temp file which is created during training.
* Or if reading from the {@link ObjectStream} fails.
- *
- * @deprecated Use
- * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
+ *
+ * @deprecated Use
+ * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
* and pass in a {@link TokenizerFactory}
*/
public static TokenizerModel train(String languageCode,
@@ -315,7 +315,7 @@ public class TokenizerME extends Abstrac
return new TokenizerModel(languageCode, maxentModel, abbreviations,
useAlphaNumericOptimization, manifestInfoEntries);
}
-
+
/**
* Trains a model for the {@link TokenizerME} with a default cutoff of 5 and 100 iterations.
@@ -328,20 +328,20 @@ public class TokenizerME extends Abstrac
*
* @throws IOException it throws an {@link IOException} if an {@link IOException}
* is thrown during IO operations on a temp file which is
- *
+ *
* @throws ObjectStreamException if reading from the {@link ObjectStream} fails
* created during training.
- *
- *
- * @deprecated Use
- * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
+ *
+ *
+ * @deprecated Use
+ * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)}
* and pass in a {@link TokenizerFactory}
*/
public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples,
boolean useAlphaNumericOptimization) throws IOException, ObjectStreamException {
return train(languageCode, samples, useAlphaNumericOptimization, ModelUtil.createDefaultTrainingParameters());
}
-
+
/**
* Returns the value of the alpha-numeric optimization flag.
*
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java Fri May 2 12:34:23 2014
@@ -46,12 +46,12 @@ import opennlp.tools.util.model.ModelUti
public final class TokenizerModel extends BaseModel {
private static final String COMPONENT_NAME = "TokenizerME";
-
+
private static final String TOKENIZER_MODEL_ENTRY = "token.model";
/**
* Initializes the current instance.
- *
+ *
* @param tokenizerModel the model
* @param manifestInfoEntries the manifest
* @param tokenizerFactory the factory
@@ -68,7 +68,7 @@ public final class TokenizerModel extend
*
* @param tokenizerMaxentModel
* @param useAlphaNumericOptimization
- *
+ *
* @deprecated Use
* {@link TokenizerModel#TokenizerModel(MaxentModel, Map, TokenizerFactory)}
* instead and pass in a {@link TokenizerFactory}.
@@ -76,7 +76,7 @@ public final class TokenizerModel extend
public TokenizerModel(String language, MaxentModel tokenizerMaxentModel,
Dictionary abbreviations, boolean useAlphaNumericOptimization,
Map<String, String> manifestInfoEntries) {
- this(tokenizerMaxentModel, manifestInfoEntries,
+ this(tokenizerMaxentModel, manifestInfoEntries,
new TokenizerFactory(language, abbreviations, useAlphaNumericOptimization, null));
}
@@ -87,7 +87,7 @@ public final class TokenizerModel extend
* @param tokenizerMaxentModel
* @param useAlphaNumericOptimization
* @param manifestInfoEntries
- *
+ *
* @deprecated Use
* {@link TokenizerModel#TokenizerModel(MaxentModel, Map, TokenizerFactory)}
* instead and pass in a {@link TokenizerFactory}.
@@ -103,7 +103,7 @@ public final class TokenizerModel extend
* @param language
* @param tokenizerMaxentModel
* @param useAlphaNumericOptimization
- *
+ *
* @deprecated Use
* {@link TokenizerModel#TokenizerModel(MaxentModel, Map, TokenizerFactory)}
* instead and pass in a {@link TokenizerFactory}.
@@ -112,7 +112,7 @@ public final class TokenizerModel extend
boolean useAlphaNumericOptimization) {
this(language, tokenizerMaxentModel, useAlphaNumericOptimization, null);
}
-
+
/**
* Initializes the current instance.
*
@@ -124,11 +124,11 @@ public final class TokenizerModel extend
public TokenizerModel(InputStream in) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, in);
}
-
+
public TokenizerModel(File modelFile) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, modelFile);
}
-
+
public TokenizerModel(URL modelURL) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, modelURL);
}
@@ -168,7 +168,7 @@ public final class TokenizerModel extend
public MaxentModel getMaxentModel() {
return (MaxentModel) artifactMap.get(TOKENIZER_MODEL_ENTRY);
}
-
+
public Dictionary getAbbreviations() {
if (getFactory() != null) {
return getFactory().getAbbreviationDictionary();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerStream.java Fri May 2 12:34:23 2014
@@ -31,7 +31,7 @@ public class TokenizerStream implements
private Tokenizer tokenizer;
private ObjectStream<String> input;
-
+
public TokenizerStream(Tokenizer tokenizer, ObjectStream<String> input) {
this.tokenizer = tokenizer;
this.input = input;
@@ -39,16 +39,16 @@ public class TokenizerStream implements
public TokenSample read() throws IOException {
String inputString = input.read();
-
+
if (inputString != null) {
Span tokens[] = tokenizer.tokenizePos(inputString);
-
+
return new TokenSample(inputString, tokens);
}
-
+
return null;
}
-
+
public void close() throws IOException {
input.close();
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java Fri May 2 12:34:23 2014
@@ -28,32 +28,32 @@ import opennlp.tools.util.Span;
* separated token strings.
*/
public class WhitespaceTokenStream extends FilterObjectStream<TokenSample, String> {
-
+
public WhitespaceTokenStream(ObjectStream<TokenSample> tokens) {
super(tokens);
}
public String read() throws IOException {
TokenSample tokenSample = samples.read();
-
+
if (tokenSample != null) {
StringBuilder whitespaceSeparatedTokenString = new StringBuilder();
-
+
for (Span token : tokenSample.getTokenSpans()) {
whitespaceSeparatedTokenString.append(
token.getCoveredText(tokenSample.getText()));
whitespaceSeparatedTokenString.append(' ');
}
-
+
// Shorten string by one to get rid of last space
if (whitespaceSeparatedTokenString.length() > 0) {
whitespaceSeparatedTokenString.setLength(
whitespaceSeparatedTokenString.length() -1 );
}
-
+
return whitespaceSeparatedTokenString.toString();
}
-
+
return null;
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java Fri May 2 12:34:23 2014
@@ -25,8 +25,8 @@ import opennlp.tools.util.StringUtil;
/**
* This tokenizer uses white spaces to tokenize the input text.
- *
- * To obtain an instance of this tokenizer use the static final
+ *
+ * To obtain an instance of this tokenizer use the static final
* <code>INSTANCE</code> field.
*/
public class WhitespaceTokenizer extends AbstractTokenizer {
@@ -65,11 +65,11 @@ public class WhitespaceTokenizer extends
}
}
}
-
+
if (inTok) {
tokens.add(new Span(tokStart, end));
}
-
+
return tokens.toArray(new Span[tokens.size()]);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java Fri May 2 12:34:23 2014
@@ -24,13 +24,13 @@ import opennlp.tools.tokenize.DefaultTok
import opennlp.tools.tokenize.TokenContextGenerator;
public class Factory {
-
+
public static final String DEFAULT_ALPHANUMERIC = "^[A-Za-z0-9]+$";
-
+
/**
* Gets the alpha numeric pattern for the language. Please save the value
* locally because this call is expensive.
- *
+ *
* @param languageCode
* the language code. If null or unknow the default pattern will be
* returned.
@@ -40,10 +40,10 @@ public class Factory {
if("pt".equals(languageCode)) {
return Pattern.compile("^[0-9a-záãâà éêÃóõôúüçA-ZÃÃÃÃÃÃÃÃÃÃÃÃÃ]+$");
}
-
+
return Pattern.compile(DEFAULT_ALPHANUMERIC);
}
-
+
public TokenContextGenerator createTokenContextGenerator(String languageCode, Set<String> abbreviations) {
return new DefaultTokenContextGenerator(abbreviations);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java Fri May 2 12:34:23 2014
@@ -58,7 +58,7 @@ public abstract class AbstractEventStrea
@Override
public final Event read() throws IOException {
-
+
if (events.hasNext()) {
return events.next();
}
@@ -67,21 +67,21 @@ public abstract class AbstractEventStrea
while (!events.hasNext() && (sample = samples.read()) != null) {
events = createEvents(sample);
}
-
+
if (events.hasNext()) {
return read();
}
}
-
+
return null;
}
-
+
@Override
public void reset() throws IOException, UnsupportedOperationException {
events = Collections.emptyIterator();
samples.reset();
}
-
+
@Override
public void close() throws IOException {
samples.close();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractObjectStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractObjectStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractObjectStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/AbstractObjectStream.java Fri May 2 12:34:23 2014
@@ -26,7 +26,7 @@ public class AbstractObjectStream<T> imp
protected AbstractObjectStream(ObjectStream<T> stream) {
this.stream = stream;
}
-
+
@Override
public T read() throws IOException {
return stream.read();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java Fri May 2 12:34:23 2014
@@ -27,13 +27,13 @@ import opennlp.tools.util.model.BaseMode
/**
* Base class for all tool factories.
- *
- * Extensions of this class should:
+ *
+ * Extensions of this class should:
* <ul>
- * <li>implement an empty constructor (TODO is it necessary?)
- * <li>implement a constructor that takes the {@link ArtifactProvider} and
- * calls {@code BaseToolFactory(Map)}
- * <li>override {@link #createArtifactMap()} and
+ * <li>implement an empty constructor (TODO is it necessary?)
+ * <li>implement a constructor that takes the {@link ArtifactProvider} and
+ * calls {@code BaseToolFactory(Map)}
+ * <li>override {@link #createArtifactMap()} and
* {@link #createArtifactSerializersMap()} methods if necessary.
* </ul>
*/
@@ -78,16 +78,16 @@ public abstract class BaseToolFactory {
public Map<String, Object> createArtifactMap() {
return new HashMap<String, Object>();
}
-
+
/**
* Creates the manifest entries that will be added to the model manifest
- *
+ *
* @return the manifest entries to added to the model manifest
*/
public Map<String, String> createManifestEntries() {
return new HashMap<String, String>();
}
-
+
/**
* Validates the parsed artifacts. If something is not
* valid subclasses should throw an {@link InvalidFormatException}.
@@ -103,7 +103,7 @@ public abstract class BaseToolFactory {
public static BaseToolFactory create(String subclassName,
ArtifactProvider artifactProvider) throws InvalidFormatException {
BaseToolFactory theFactory = null;
-
+
try {
// load the ToolFactory using the default constructor
theFactory = ExtensionLoader.instantiateExtension(
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java Fri May 2 12:34:23 2014
@@ -196,10 +196,10 @@ public class BeamSearch<T> {
*/
public Sequence bestSequence(T[] sequence, Object[] additionalContext) {
Sequence sequences[] = bestSequences(1, sequence, additionalContext,zeroLog);
-
+
if (sequences.length > 0)
return sequences[0];
- else
+ else
return null;
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/CollectionObjectStream.java Fri May 2 12:34:23 2014
@@ -22,12 +22,12 @@ import java.util.Iterator;
public class CollectionObjectStream<E> implements ObjectStream<E> {
private Collection<E> collection;
-
+
private Iterator<E> iterator;
public CollectionObjectStream(Collection<E> collection) {
this.collection = collection;
-
+
reset();
}
@@ -37,11 +37,11 @@ public class CollectionObjectStream<E> i
else
return null;
}
-
+
public void reset() {
this.iterator = collection.iterator();
}
-
+
public void close() {
}
}
\ No newline at end of file
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/EventTraceStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/EventTraceStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/EventTraceStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/EventTraceStream.java Fri May 2 12:34:23 2014
@@ -25,22 +25,22 @@ import opennlp.tools.ml.model.Event;
public class EventTraceStream extends FilterObjectStream<Event, Event> {
private Writer writer;
-
+
public EventTraceStream(ObjectStream<Event> stream, Writer writer) {
super(stream);
-
+
this.writer = writer;
}
-
-
+
+
public Event read() throws IOException {
Event event = samples.read();
-
+
if (event != null) {
writer.write(event.toString());
writer.write("\n");
}
-
+
return event;
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/FilterObjectStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/FilterObjectStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/FilterObjectStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/FilterObjectStream.java Fri May 2 12:34:23 2014
@@ -22,7 +22,7 @@ import java.io.IOException;
/**
* Abstract base class for filtering {@link ObjectStream}s.
* <p>
- * Filtering streams take an existing stream and convert
+ * Filtering streams take an existing stream and convert
* its output to something else.
*
* @param <S> the type of the source/input stream
@@ -31,14 +31,14 @@ import java.io.IOException;
public abstract class FilterObjectStream<S, T> implements ObjectStream<T> {
protected final ObjectStream<S> samples;
-
+
protected FilterObjectStream(ObjectStream<S> samples) {
if (samples == null)
throw new IllegalArgumentException("samples must not be null!");
-
+
this.samples = samples;
}
-
+
public void reset() throws IOException, UnsupportedOperationException {
samples.reset();
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/HashSumEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/HashSumEventStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/HashSumEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/HashSumEventStream.java Fri May 2 12:34:23 2014
@@ -30,12 +30,12 @@ import opennlp.tools.ml.model.EventStrea
public class HashSumEventStream implements EventStream {
private final EventStream eventStream;
-
+
private MessageDigest digest;
-
+
public HashSumEventStream(EventStream eventStream) {
this.eventStream = eventStream;
-
+
try {
digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
@@ -43,41 +43,41 @@ public class HashSumEventStream implemen
throw new IllegalStateException(e);
}
}
-
+
public boolean hasNext() throws IOException {
return eventStream.hasNext();
}
public Event next() throws IOException {
-
+
Event event = eventStream.next();
-
+
try {
digest.update(event.toString().getBytes("UTF-8"));
}
catch (UnsupportedEncodingException e) {
throw new IllegalStateException(e);
}
-
+
return event;
}
-
+
/**
* Calculates the hash sum of the stream. The method must be
* called after the stream is completely consumed.
- *
+ *
* @return the hash sum
* @throws IllegalStateException if the stream is not consumed completely,
* completely means that hasNext() returns false
*/
public BigInteger calculateHashSum() {
-
+
// if (hasNext())
// throw new IllegalStateException("stream must be consumed completely!");
-
+
return new BigInteger(1, digest.digest());
}
-
+
public void remove() {
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/InvalidFormatException.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/InvalidFormatException.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/InvalidFormatException.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/InvalidFormatException.java Fri May 2 12:34:23 2014
@@ -33,12 +33,12 @@ public class InvalidFormatException exte
public InvalidFormatException(String message) {
super(message);
}
-
+
public InvalidFormatException(Throwable t) {
super();
initCause(t);
}
-
+
public InvalidFormatException(String message, Throwable t) {
super(message);
initCause(t);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java Fri May 2 12:34:23 2014
@@ -40,33 +40,33 @@ import java.io.ObjectStreamException;
* elements of the ObjectStream. In either case, users not reading the
* documentation carefully might run into unexpected behavior.</li>
* </ul>
- *
+ *
* @see ObjectStreamException
*/
public interface ObjectStream<T> {
-
+
/**
* Returns the next object. Calling this method repeatedly until it returns
- * null will return each object from the underlying source exactly once.
- *
+ * null will return each object from the underlying source exactly once.
+ *
* @return the next object or null to signal that the stream is exhausted
*/
T read() throws IOException;
-
+
/**
* Repositions the stream at the beginning and the previously seen object sequence
* will be repeated exactly. This method can be used to re-read
* the stream if multiple passes over the objects are required.
- *
+ *
* The implementation of this method is optional.
*/
void reset() throws IOException, UnsupportedOperationException;
-
+
/**
* Closes the <code>ObjectStream</code> and releases all allocated
* resources. After close was called its not allowed to call
* read or reset.
- *
+ *
* @throws IOException
*/
void close() throws IOException;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java Fri May 2 12:34:23 2014
@@ -25,99 +25,99 @@ public class ObjectStreamUtils {
/**
* Creates an {@link ObjectStream} form an array.
- *
+ *
* @param <T>
* @param array
- *
+ *
* @return the object stream over the array elements
*/
public static <T> ObjectStream<T> createObjectStream(final T... array) {
-
+
return new ObjectStream<T>() {
private int index = 0;
-
+
public T read() {
- if (index < array.length)
+ if (index < array.length)
return array[index++];
- else
+ else
return null;
}
public void reset() {
index = 0;
}
-
+
public void close() {
}
};
}
-
+
/**
* Creates an {@link ObjectStream} form a collection.
- *
+ *
* @param <T>
* @param collection
- *
+ *
* @return the object stream over the collection elements
*/
public static <T> ObjectStream<T> createObjectStream(final Collection<T> collection) {
-
+
return new ObjectStream<T>() {
-
+
private Iterator<T> iterator = collection.iterator();
-
+
public T read() {
if (iterator.hasNext())
return iterator.next();
else
return null;
}
-
+
public void reset() {
iterator = collection.iterator();
}
-
+
public void close() {
}
};
}
-
+
public static <T> ObjectStream<T> createObjectStream(final ObjectStream<T>... streams) {
-
+
for (ObjectStream<T> stream : streams) {
if (stream == null)
throw new NullPointerException("stream cannot be null");
}
-
+
return new ObjectStream<T>() {
-
+
private int streamIndex = 0;
-
+
public T read() throws IOException {
-
+
T object = null;
-
+
while (streamIndex < streams.length && object == null) {
object = streams[streamIndex].read();
-
+
if (object == null)
streamIndex++;
}
-
+
return object;
}
public void reset() throws IOException, UnsupportedOperationException {
streamIndex = 0;
-
+
for (ObjectStream<T> stream : streams) {
stream.reset();
}
}
public void close() throws IOException {
-
+
for (ObjectStream<T> stream : streams) {
stream.close();
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ParagraphStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ParagraphStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ParagraphStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ParagraphStream.java Fri May 2 12:34:23 2014
@@ -31,15 +31,15 @@ public class ParagraphStream extends Fil
}
public String read() throws IOException {
-
+
StringBuilder paragraph = new StringBuilder();
-
+
while (true) {
String line = samples.read();
-
+
// The last paragraph in the input might not
// be terminated well with a new line at the end.
-
+
if (line == null || line.equals("")) {
if (paragraph.length() > 0) {
return paragraph.toString();
@@ -48,7 +48,7 @@ public class ParagraphStream extends Fil
else {
paragraph.append(line).append('\n');
}
-
+
if (line == null)
return null;
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java Fri May 2 12:34:23 2014
@@ -37,7 +37,7 @@ public class PlainTextByLineStream imple
private final String encoding;
private InputStreamFactory inputStreamFactory;
-
+
private BufferedReader in;
public PlainTextByLineStream(InputStreamFactory inputStreamFactory, String charsetName) throws IOException {
@@ -48,7 +48,7 @@ public class PlainTextByLineStream imple
this.inputStreamFactory = inputStreamFactory;
this.channel = null;
this.encoding = charset.name();
-
+
reset();
}
@@ -115,7 +115,7 @@ public class PlainTextByLineStream imple
}
public void close() throws IOException {
-
+
if (in != null && channel == null) {
in.close();
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ResetableIterator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ResetableIterator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ResetableIterator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ResetableIterator.java Fri May 2 12:34:23 2014
@@ -24,7 +24,7 @@ import java.util.Iterator;
* This interface makes an {@link Iterator} resetable.
*/
public interface ResetableIterator<E> extends Iterator<E> {
-
+
/**
* Sets the {@link Iterator} back to the first retrieved element,
* the seen sequence of elements must be repeated.
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/SequenceCodec.java Fri May 2 12:34:23 2014
@@ -23,35 +23,35 @@ public interface SequenceCodec<T> {
/**
* Decodes a sequence T objects into Span objects.
- *
+ *
* @param c
- *
+ *
* @return
*/
Span[] decode(List<T> c);
-
+
/**
* Encodes Span objects into a sequence of T objects.
- *
+ *
* @param names
* @param length
- *
+ *
* @return
*/
T[] encode(Span names[], int length);
-
+
/**
* Creates a sequence validator which can validate a sequence of outcomes.
- *
+ *
* @return
*/
SequenceValidator<T> createSequenceValidator();
-
+
/**
* Checks if the outcomes of the model are compatible with the codec.
- *
+ *
* @param outcomes all possible model outcomes
- *
+ *
* @return
*/
boolean areOutcomesCompatible(String[] outcomes);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java Fri May 2 12:34:23 2014
@@ -203,14 +203,14 @@ public class Span implements Comparable<
/**
* Return a copy of this span with leading and trailing white spaces removed.
- *
+ *
* @param text
* @return
*/
public Span trim(CharSequence text) {
-
+
int newStartOffset = getStart();
-
+
for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) {
newStartOffset++;
}
@@ -219,7 +219,7 @@ public class Span implements Comparable<
for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) {
newEndOffset--;
}
-
+
if (newStartOffset == getStart() && newEndOffset == getEnd()) {
return this;
}
@@ -230,7 +230,7 @@ public class Span implements Comparable<
return new Span(newStartOffset, newEndOffset, getType());
}
}
-
+
/**
* Compares the specified span to the current span.
*/
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringList.java Fri May 2 12:34:23 2014
@@ -45,7 +45,7 @@ public class StringList implements Itera
/**
* Initializes the current instance.
- *
+ *
* Note: <br>
* Token Strings will be replaced by identical internal String object.
*
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java Fri May 2 12:34:23 2014
@@ -18,96 +18,96 @@
package opennlp.tools.util;
public class StringUtil {
-
+
/**
* Determines if the specified character is a whitespace.
- *
+ *
* A character is considered a whitespace when one
* of the following conditions is meet:
- *
+ *
* <ul>
* <li>Its a {@link Character#isWhitespace(int)} whitespace.</li>
* <li>Its a part of the Unicode Zs category ({@link Character#SPACE_SEPARATOR}).</li>
* </ul>
- *
+ *
* <code>Character.isWhitespace(int)</code> does not include no-break spaces.
* In OpenNLP no-break spaces are also considered as white spaces.
- *
+ *
* @param charCode
* @return true if white space otherwise false
*/
public static boolean isWhitespace(char charCode) {
- return Character.isWhitespace(charCode) ||
+ return Character.isWhitespace(charCode) ||
Character.getType(charCode) == Character.SPACE_SEPARATOR;
}
-
+
/**
* Determines if the specified character is a whitespace.
- *
+ *
* A character is considered a whitespace when one
* of the following conditions is meet:
- *
+ *
* <ul>
* <li>Its a {@link Character#isWhitespace(int)} whitespace.</li>
* <li>Its a part of the Unicode Zs category ({@link Character#SPACE_SEPARATOR}).</li>
* </ul>
- *
+ *
* <code>Character.isWhitespace(int)</code> does not include no-break spaces.
* In OpenNLP no-break spaces are also considered as white spaces.
- *
+ *
* @param charCode
* @return true if white space otherwise false
*/
public static boolean isWhitespace(int charCode) {
- return Character.isWhitespace(charCode) ||
+ return Character.isWhitespace(charCode) ||
Character.getType(charCode) == Character.SPACE_SEPARATOR;
}
-
-
+
+
/**
- * Converts to lower case independent of the current locale via
+ * Converts to lower case independent of the current locale via
* {@link Character#toLowerCase(char)} which uses mapping information
* from the UnicodeData file.
- *
+ *
* @param string
* @return lower cased String
*/
public static String toLowerCase(CharSequence string) {
-
+
char lowerCaseChars[] = new char[string.length()];
-
+
for (int i = 0; i < string.length(); i++) {
lowerCaseChars[i] = Character.toLowerCase(string.charAt(i));
}
-
+
return new String(lowerCaseChars);
}
-
+
/**
- * Converts to upper case independent of the current locale via
+ * Converts to upper case independent of the current locale via
* {@link Character#toUpperCase(char)} which uses mapping information
* from the UnicodeData file.
- *
+ *
* @param string
* @return upper cased String
*/
public static String toUpperCase(CharSequence string) {
char upperCaseChars[] = new char[string.length()];
-
+
for (int i = 0; i < string.length(); i++) {
upperCaseChars[i] = Character.toUpperCase(string.charAt(i));
}
-
+
return new String(upperCaseChars);
}
-
+
/**
* Returns <tt>true</tt> if {@link CharSequence#length()} is
* <tt>0</tt> or <tt>null</tt>.
- *
+ *
* @return <tt>true</tt> if {@link CharSequence#length()} is <tt>0</tt>, otherwise
* <tt>false</tt>
- *
+ *
* @since 1.5.1
*/
public static boolean isEmpty(CharSequence theString) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java Fri May 2 12:34:23 2014
@@ -28,21 +28,21 @@ import java.util.Properties;
import opennlp.tools.ml.EventTrainer;
public class TrainingParameters {
-
+
// TODO: are them duplicated?
public static final String ALGORITHM_PARAM = "Algorithm";
public static final String TRAINER_TYPE_PARAM = "TrainerType";
-
+
public static final String ITERATIONS_PARAM = "Iterations";
public static final String CUTOFF_PARAM = "Cutoff";
-
+
private Map<String, String> parameters = new HashMap<String, String>();
-
+
public TrainingParameters() {
}
-
+
public TrainingParameters(InputStream in) throws IOException {
-
+
Properties properties = new Properties();
properties.load(in);
@@ -50,42 +50,42 @@ public class TrainingParameters {
parameters.put((String) entry.getKey(), (String) entry.getValue());
}
}
-
+
/**
* Retrieves the training algorithm name for a given name space.
- *
+ *
* @return the name or null if not set.
*/
public String algorithm(String namespace) {
return parameters.get(namespace + "." + ALGORITHM_PARAM);
}
-
+
/**
* Retrieves the training algorithm name.
- *
+ *
* @return the name or null if not set.
*/
public String algorithm() {
return parameters.get(ALGORITHM_PARAM);
}
-
+
/**
* Retrieves a map with the training parameters which have the passed name space.
- *
+ *
* @param namespace
- *
+ *
* @return a parameter map which can be passed to the train and validate methods.
*/
public Map<String, String> getSettings(String namespace) {
-
+
Map<String, String> trainingParams = new HashMap<String, String>();
-
+
for (Map.Entry<String, String> entry : parameters.entrySet()) {
String key = entry.getKey();
if (namespace != null) {
String prefix = namespace + ".";
-
+
if (key.startsWith(prefix)) {
trainingParams.put(key.substring(prefix.length()), entry.getValue());
}
@@ -96,33 +96,33 @@ public class TrainingParameters {
}
}
}
-
+
return Collections.unmodifiableMap(trainingParams);
}
-
- /**
+
+ /**
* Retrieves all parameters without a name space.
- *
+ *
* @return the settings map
*/
public Map<String, String> getSettings() {
return getSettings(null);
}
-
+
// reduces the params to contain only the params in the name space
public TrainingParameters getParameters(String namespace) {
-
+
TrainingParameters params = new TrainingParameters();
-
+
for (Map.Entry<String, String> entry : getSettings(namespace).entrySet()) {
params.put(entry.getKey(), entry.getValue());
}
-
+
return params;
}
-
+
public void put(String namespace, String key, String value) {
-
+
if (namespace == null) {
parameters.put(key, value);
}
@@ -130,18 +130,18 @@ public class TrainingParameters {
parameters.put(namespace + "." + key, value);
}
}
-
+
public void put(String key, String value) {
put(null, key, value);
}
-
+
public void serialize(OutputStream out) throws IOException {
Properties properties = new Properties();
-
+
for (Map.Entry<String, String> entry : parameters.entrySet()) {
properties.put(entry.getKey(), entry.getValue());
}
-
+
properties.store(out, null);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TreeHeap.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TreeHeap.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TreeHeap.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/TreeHeap.java Fri May 2 12:34:23 2014
@@ -25,7 +25,7 @@ import java.util.TreeSet;
* An implementation of the Heap interface based on {@link java.util.SortedSet}.
* This implementation will not allow multiple objects which are equal to be added to the heap.
* Only use this implementation when object in the heap can be totally ordered (no duplicates).
- *
+ *
* @deprecated not used anymore, when there is need for a heap use ListHeap instead
*/
@Deprecated
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Version.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Version.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Version.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Version.java Fri May 2 12:34:23 2014
@@ -37,11 +37,11 @@ import java.util.Properties;
public class Version {
private static final String DEV_VERSION_STRING = "0.0.0-SNAPSHOT";
-
+
public static final Version DEV_VERSION = Version.parse(DEV_VERSION_STRING);
-
+
private static final String SNAPSHOT_MARKER = "-SNAPSHOT";
-
+
private final int major;
private final int minor;
@@ -49,7 +49,7 @@ public class Version {
private final int revision;
private final boolean snapshot;
-
+
/**
* Initializes the current instance with the provided
* versions.
@@ -75,10 +75,10 @@ public class Version {
* @param revision
*/
public Version(int major, int minor, int revision) {
- this(major, minor, revision, false);
+ this(major, minor, revision, false);
}
-
+
/**
* Retrieves the major version.
*
@@ -109,7 +109,7 @@ public class Version {
public boolean isSnapshot() {
return snapshot;
}
-
+
/**
* Retrieves the version string.
*
@@ -163,7 +163,7 @@ public class Version {
}
int indexFirstDash = version.indexOf('-');
-
+
int versionEnd;
if (indexFirstDash == -1) {
versionEnd = version.length();
@@ -171,9 +171,9 @@ public class Version {
else {
versionEnd = indexFirstDash;
}
-
+
boolean snapshot = version.endsWith(SNAPSHOT_MARKER);
-
+
return new Version(Integer.parseInt(version.substring(0, indexFirstDot)),
Integer.parseInt(version.substring(indexFirstDot + 1, indexSecondDot)),
Integer.parseInt(version.substring(indexSecondDot + 1, versionEnd)), snapshot);
@@ -185,14 +185,14 @@ public class Version {
* @return the current version
*/
public static Version currentVersion() {
-
+
Properties manifest = new Properties();
-
+
// Try to read the version from the version file if it is available,
// otherwise set the version to the development version
-
+
InputStream versionIn = Version.class.getResourceAsStream("opennlp.version");
-
+
if (versionIn != null) {
try {
manifest.load(versionIn);
@@ -207,13 +207,13 @@ public class Version {
}
}
}
-
- String versionString =
+
+ String versionString =
manifest.getProperty("OpenNLP-Version", DEV_VERSION_STRING);
-
+
if (versionString.equals("${pom.version}"))
versionString = DEV_VERSION_STRING;
-
+
return Version.parse(versionString);
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java Fri May 2 12:34:23 2014
@@ -30,7 +30,7 @@ import opennlp.tools.util.ObjectStream;
* <p>
* Cross validation is used to evaluate the performance of a classifier when only
* training data is available. The training set is split into n parts
- * and the training / evaluation is performed n times on these parts.
+ * and the training / evaluation is performed n times on these parts.
* The training partition always consists of n -1 parts and one part is used for testing.
* <p>
* To use the <code>CrossValidationPartioner</code> a client iterates over the n
@@ -47,87 +47,87 @@ public class CrossValidationPartitioner<
* @param <E>
*/
private static class TestSampleStream<E> implements ObjectStream<E> {
-
+
private ObjectStream<E> sampleStream;
-
+
private final int numberOfPartitions;
-
+
private final int testIndex;
-
+
private int index;
-
+
private boolean isPoisened;
-
+
private TestSampleStream(ObjectStream<E> sampleStream, int numberOfPartitions, int testIndex) {
this.numberOfPartitions = numberOfPartitions;
this.sampleStream = sampleStream;
this.testIndex = testIndex;
}
-
+
public E read() throws IOException {
if (isPoisened) {
throw new IllegalStateException();
}
-
+
// skip training samples
while (index % numberOfPartitions != testIndex) {
sampleStream.read();
index++;
}
-
+
index++;
-
+
return sampleStream.read();
}
-
+
/**
* Throws <code>UnsupportedOperationException</code>
*/
public void reset() {
throw new UnsupportedOperationException();
}
-
+
public void close() throws IOException {
sampleStream.close();
isPoisened = true;
}
-
+
void poison() {
isPoisened = true;
}
}
-
+
/**
* The <code>TrainingSampleStream</code> which iterates over
* all training elements.
- *
+ *
* Note:
* After the <code>TestSampleStream</code> was obtained
* the <code>TrainingSampleStream</code> must not be used
* anymore, otherwise a {@link IllegalStateException}
* is thrown.
- *
+ *
* The <code>ObjectStream</code>s must not be used anymore after the
* <code>CrossValidationPartitioner</code> was moved
* to one of next partitions. If they are called anyway
* a {@link IllegalStateException} is thrown.
- *
+ *
* @param <E>
*/
public static class TrainingSampleStream<E> implements ObjectStream<E> {
private ObjectStream<E> sampleStream;
-
+
private final int numberOfPartitions;
-
+
private final int testIndex;
-
+
private int index;
-
+
private boolean isPoisened;
-
+
private TestSampleStream<E> testSampleStream;
-
+
TrainingSampleStream(ObjectStream<E> sampleStream, int numberOfPartitions, int testIndex) {
this.numberOfPartitions = numberOfPartitions;
this.sampleStream = sampleStream;
@@ -135,20 +135,20 @@ public class CrossValidationPartitioner<
}
public E read() throws IOException {
-
+
if (testSampleStream != null || isPoisened) {
throw new IllegalStateException();
}
-
+
// If the test element is reached skip over it to not include it in
// the training data
if (index % numberOfPartitions == testIndex) {
sampleStream.read();
index++;
}
-
+
index++;
-
+
return sampleStream.read();
}
@@ -156,7 +156,7 @@ public class CrossValidationPartitioner<
* Resets the training sample. Use this if you need to collect things before
* training, for example, to collect induced abbreviations or create a POS
* Dictionary.
- *
+ *
* @throws IOException
*/
public void reset() throws IOException {
@@ -171,48 +171,48 @@ public class CrossValidationPartitioner<
sampleStream.close();
poison();
}
-
+
void poison() {
isPoisened = true;
if (testSampleStream != null)
testSampleStream.poison();
}
-
+
/**
* Retrieves the <code>ObjectStream</code> over the test/evaluations
* elements and poisons this <code>TrainingSampleStream</code>.
* From now on calls to the hasNext and next methods are forbidden
* and will raise an<code>IllegalArgumentException</code>.
- *
+ *
* @return the test sample stream
*/
public ObjectStream<E> getTestSampleStream() throws IOException {
-
+
if (isPoisened) {
throw new IllegalStateException();
}
-
+
if (testSampleStream == null) {
-
+
sampleStream.reset();
testSampleStream = new TestSampleStream<E>(sampleStream, numberOfPartitions, testIndex);
}
-
+
return testSampleStream;
}
}
-
+
/**
* An <code>ObjectStream</code> over the whole set of data samples which
* are used for the cross validation.
*/
private ObjectStream<E> sampleStream;
-
+
/**
* The number of parts the data is divided into.
*/
private final int numberOfPartitions;
-
+
/**
* The index of test part.
*/
@@ -224,10 +224,10 @@ public class CrossValidationPartitioner<
* despite the fact that it is forbidden!.
*/
private TrainingSampleStream<E> lastTrainingSampleStream;
-
+
/**
* Initializes the current instance.
- *
+ *
* @param inElements
* @param numberOfPartitions
*/
@@ -235,10 +235,10 @@ public class CrossValidationPartitioner<
this.sampleStream = inElements;
this.numberOfPartitions = numberOfPartitions;
}
-
+
/**
* Initializes the current instance.
- *
+ *
* @param elements
* @param numberOfPartitions
*/
@@ -260,23 +260,23 @@ public class CrossValidationPartitioner<
if (hasNext()) {
if (lastTrainingSampleStream != null)
lastTrainingSampleStream.poison();
-
+
sampleStream.reset();
-
+
TrainingSampleStream<E> trainingSampleStream = new TrainingSampleStream<E>(sampleStream,
numberOfPartitions, testIndex);
-
+
testIndex++;
-
+
lastTrainingSampleStream = trainingSampleStream;
-
+
return trainingSampleStream;
}
else {
throw new NoSuchElementException();
}
}
-
+
@Override
public String toString() {
return "At partition" + Integer.toString(testIndex + 1) +
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/EvaluationMonitor.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/EvaluationMonitor.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/EvaluationMonitor.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/EvaluationMonitor.java Fri May 2 12:34:23 2014
@@ -18,9 +18,9 @@
package opennlp.tools.util.eval;
public interface EvaluationMonitor<T> {
-
+
void correctlyClassified(T reference, T prediction);
-
+
void missclassified(T reference, T prediction);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java Fri May 2 12:34:23 2014
@@ -34,7 +34,7 @@ import opennlp.tools.util.ObjectStream;
public abstract class Evaluator<T> {
private List<EvaluationMonitor<T>> listeners;
-
+
public Evaluator(EvaluationMonitor<T>... aListeners) {
if (aListeners != null) {
List<EvaluationMonitor<T>> listenersList = new ArrayList<EvaluationMonitor<T>>(
@@ -49,14 +49,14 @@ public abstract class Evaluator<T> {
listeners = Collections.emptyList();
}
}
-
+
/**
* Evaluates the given reference sample object.
- *
+ *
* The implementation has to update the score after every invocation.
*
* @param reference the reference sample.
- *
+ *
* @return the predicted sample
*/
protected abstract T processSample(T reference);
@@ -64,14 +64,14 @@ public abstract class Evaluator<T> {
/**
* Evaluates the given reference object. The default implementation calls
* {@link Evaluator#processSample(Object)}
- *
+ *
* <p>
* <b>note:</b> this method will be changed to private in the future.
* Implementations should override {@link Evaluator#processSample(Object)} instead.
* If this method is override, the implementation has to update the score
* after every invocation.
* </p>
- *
+ *
* @param sample
* the sample to be evaluated
*/
@@ -85,11 +85,11 @@ public abstract class Evaluator<T> {
} else {
for (EvaluationMonitor<T> listener : listeners) {
listener.missclassified(sample, predicted);
- }
+ }
}
}
}
-
+
/**
* Reads all sample objects from the stream
* and evaluates each sample object with
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java Fri May 2 12:34:23 2014
@@ -32,13 +32,13 @@ public final class FMeasure {
/** |selected| = true positives + false positives <br>
* the count of selected (or retrieved) items */
private long selected;
-
+
/** |target| = true positives + false negatives <br>
* the count of target (or correct) items */
private long target;
-
+
private long truePositive;
-
+
/**
* Retrieves the arithmetic mean of the precision scores
* calculated for each evaluated sample.
@@ -58,7 +58,7 @@ public final class FMeasure {
public double getRecallScore() {
return target > 0 ? (double)truePositive / (double)target : 0;
}
-
+
/**
* Retrieves the f-measure score.
*
@@ -77,20 +77,20 @@ public final class FMeasure {
return -1;
}
}
-
+
public void updateScores(Object references[], Object predictions[]) {
-
+
truePositive += countTruePositives(references, predictions);
selected += predictions.length;
target += references.length;
}
-
+
public void mergeInto(FMeasure measure) {
this.selected += measure.selected;
this.target += measure.target;
this.truePositive += measure.truePositive;
}
-
+
/**
* Creates a human read-able {@link String} representation.
*/
@@ -100,7 +100,7 @@ public final class FMeasure {
"Recall: " + Double.toString(getRecallScore()) + "\n" +
"F-Measure: " + Double.toString(getFMeasure());
}
-
+
/**
* This method counts the number of objects which are equal and
* occur in the references and predictions arrays.
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java Fri May 2 12:34:23 2014
@@ -45,18 +45,18 @@ public class Mean {
/**
* Adds a value count times to the arithmetic mean.
- *
+ *
* @param value the value which should be added
* to the arithmetic mean.
- *
+ *
* @param count number of times the value should be added to
* arithmetic mean.
*/
public void add(double value, long count) {
sum += value * count;
- this.count += count;
+ this.count += count;
}
-
+
/**
* Retrieves the mean of all values added with
* {@link #add(double)} or 0 if there are zero added
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java Fri May 2 12:34:23 2014
@@ -27,18 +27,18 @@ import java.lang.reflect.Field;
public class ExtensionLoader {
private static boolean isOsgiAvailable = false;
-
+
private ExtensionLoader() {
}
-
+
static boolean isOSGiAvailable() {
return isOsgiAvailable;
}
-
+
static void setOSGiAvailable() {
isOsgiAvailable = true;
}
-
+
// Pass in the type (interface) of the class to load
/**
* Instantiates an user provided extension to OpenNLP.
@@ -53,7 +53,7 @@ public class ExtensionLoader {
*
* @param clazz
* @param extensionClassName
- *
+ *
* @return
*/
// TODO: Throw custom exception if loading fails ...
@@ -63,9 +63,9 @@ public class ExtensionLoader {
// First try to load extension and instantiate extension from class path
try {
Class<?> extClazz = Class.forName(extensionClassName);
-
+
if (clazz.isAssignableFrom(extClazz)) {
-
+
try {
return (T) extClazz.newInstance();
} catch (InstantiationException e) {
@@ -99,28 +99,28 @@ public class ExtensionLoader {
} catch (ClassNotFoundException e) {
// Class is not on classpath
}
-
+
// Loading from class path failed
-
+
// Either something is wrong with the class name or OpenNLP is
// running in an OSGi environment. The extension classes are not
// on our classpath in this case.
// In OSGi we need to use services to get access to extensions.
-
+
// Determine if OSGi class is on class path
// Now load class which depends on OSGi API
if (isOsgiAvailable) {
-
+
// The OSGIExtensionLoader class will be loaded when the next line
// is executed, but not prior, and that is why it is safe to directly
// reference it here.
OSGiExtensionLoader extLoader = OSGiExtensionLoader.getInstance();
return extLoader.getExtension(clazz, extensionClassName);
}
-
- throw new ExtensionNotLoadedException("Unable to find implementation for " +
- clazz.getName() + ", the class or service " + extensionClassName +
+
+ throw new ExtensionNotLoadedException("Unable to find implementation for " +
+ clazz.getName() + ", the class or service " + extensionClassName +
" could not be located!");
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionNotLoadedException.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionNotLoadedException.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionNotLoadedException.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionNotLoadedException.java Fri May 2 12:34:23 2014
@@ -21,26 +21,26 @@ package opennlp.tools.util.ext;
* Exception indicates that an OpenNLP extension could not be loaded.
*/
public class ExtensionNotLoadedException extends RuntimeException {
-
+
private static final long serialVersionUID = 1L;
private final boolean isOSGiEnvironment;
-
+
public ExtensionNotLoadedException(String message) {
super(message);
-
+
isOSGiEnvironment = ExtensionLoader.isOSGiAvailable();
}
public ExtensionNotLoadedException(Throwable t) {
super(t);
-
+
isOSGiEnvironment = ExtensionLoader.isOSGiAvailable();
}
-
+
/**
* Indicates if OpenNLP is running in an OSGi environment or not.
- *
+ *
* @return
*/
public boolean isOSGiEnvironment() {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionServiceKeys.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionServiceKeys.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionServiceKeys.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionServiceKeys.java Fri May 2 12:34:23 2014
@@ -20,7 +20,7 @@ package opennlp.tools.util.ext;
public class ExtensionServiceKeys {
/**
- * Property key for the unique id which identifies an
+ * Property key for the unique id which identifies an
* OSGi OpenNLP extension service.
*/
public static final String ID = "OPENLP_EXTENSION_ID";
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/OSGiExtensionLoader.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/OSGiExtensionLoader.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/OSGiExtensionLoader.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/OSGiExtensionLoader.java Fri May 2 12:34:23 2014
@@ -33,13 +33,13 @@ import org.osgi.util.tracker.ServiceTrac
public class OSGiExtensionLoader implements BundleActivator {
private static OSGiExtensionLoader instance;
-
+
private BundleContext context;
-
+
public void start(BundleContext context) throws Exception {
instance = this;
this.context = context;
-
+
ExtensionLoader.setOSGiAvailable();
}
@@ -49,18 +49,18 @@ public class OSGiExtensionLoader impleme
}
/**
- * Retrieves the
- *
+ * Retrieves the
+ *
* @param clazz
* @param id
* @return
*/
<T> T getExtension(Class<T> clazz, String id) {
-
+
if (context == null) {
throw new IllegalStateException("OpenNLP Tools Bundle is not active!");
}
-
+
Filter filter;
try {
filter = FrameworkUtil.createFilter("(&(objectclass=" + clazz.getName() + ")(" +
@@ -69,15 +69,15 @@ public class OSGiExtensionLoader impleme
// Might happen when the provided IDs are invalid in some way.
throw new ExtensionNotLoadedException(e);
}
-
+
// NOTE: In 4.3 the parameters are <T, T>
ServiceTracker extensionTracker = new ServiceTracker(context, filter, null);
-
+
T extension = null;
-
+
try {
extensionTracker.open();
-
+
try {
extension = (T) extensionTracker.waitForService(30000);
} catch (InterruptedException e) {
@@ -86,11 +86,11 @@ public class OSGiExtensionLoader impleme
} finally {
extensionTracker.close();
}
-
+
if (extension == null) {
throw new ExtensionNotLoadedException("No suitable extension found. Extension name: " + id);
}
-
+
return extension;
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -34,7 +34,7 @@ import java.util.List;
* which are called from many threads and have to be thread safe.
* If that is not possible the {@link FeatureGeneratorFactory} must make a copy
* of the resource object for each feature generator instance.
- *
+ *
* @see FeatureGeneratorAdapter
* @see FeatureGeneratorFactory
*/
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -45,7 +45,7 @@ public class AggregatedFeatureGenerator
if (generator == null)
throw new IllegalArgumentException("null values in generators are not permitted!");
}
-
+
this.generators = new ArrayList<AdaptiveFeatureGenerator>(generators.length);
Collections.addAll(this.generators, generators);
@@ -56,7 +56,7 @@ public class AggregatedFeatureGenerator
public AggregatedFeatureGenerator(Collection<AdaptiveFeatureGenerator> generators) {
this(generators.toArray(new AdaptiveFeatureGenerator[generators.size()]));
}
-
+
/**
* Calls the {@link AdaptiveFeatureGenerator#clearAdaptiveData()} method
* on all aggregated {@link AdaptiveFeatureGenerator}s.
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -23,7 +23,7 @@ public class BigramNameFeatureGenerator
public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) {
String wc = FeatureGeneratorUtil.tokenFeature(tokens[index]);
- //bi-gram features
+ //bi-gram features
if (index > 0) {
features.add("pw,w="+tokens[index-1]+","+tokens[index]);
String pwc = FeatureGeneratorUtil.tokenFeature(tokens[index-1]);
@@ -31,8 +31,8 @@ public class BigramNameFeatureGenerator
}
if (index+1 < tokens.length) {
features.add("w,nw="+tokens[index]+","+tokens[index+1]);
- String nwc = FeatureGeneratorUtil.tokenFeature(tokens[index+1]);
+ String nwc = FeatureGeneratorUtil.tokenFeature(tokens[index+1]);
features.add("wc,nc="+wc+","+nwc);
}
- }
+ }
}
\ No newline at end of file
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -22,10 +22,10 @@ import java.util.Map;
import opennlp.tools.util.InvalidFormatException;
public abstract class CustomFeatureGenerator implements AdaptiveFeatureGenerator {
-
+
/**
* Initialized the Custom Feature Generator with defined properties and loaded resources.
- *
+ *
* @param properties
* @param resourceProvider
*/
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -26,7 +26,7 @@ import opennlp.tools.namefind.Dictionary
/**
* The {@link DictionaryFeatureGenerator} uses the {@link DictionaryNameFinder}
* to generated features for detected names based on the {@link InSpanGenerator}.
- *
+ *
* @see Dictionary
* @see DictionaryNameFinder
* @see InSpanGenerator
@@ -34,24 +34,24 @@ import opennlp.tools.namefind.Dictionary
public class DictionaryFeatureGenerator extends FeatureGeneratorAdapter {
private InSpanGenerator isg;
-
+
public DictionaryFeatureGenerator(Dictionary dict) {
this("",dict);
}
public DictionaryFeatureGenerator(String prefix, Dictionary dict) {
setDictionary(prefix,dict);
}
-
+
public void setDictionary(Dictionary dict) {
setDictionary("",dict);
}
-
+
public void setDictionary(String name, Dictionary dict) {
isg = new InSpanGenerator(name, new DictionaryNameFinder(dict));
}
-
+
public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) {
isg.createFeatures(features, tokens, index, previousOutcomes);
}
-
+
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -22,19 +22,19 @@ import java.util.List;
public class DocumentBeginFeatureGenerator extends FeatureGeneratorAdapter {
private String firstSentence[];
-
+
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
-
+
if (firstSentence == null) {
firstSentence = tokens;
}
-
+
if (firstSentence == tokens && index == 0) {
features.add("D=begin");
}
}
-
+
@Override
public void clearAdaptiveData() {
firstSentence = null;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FastTokenClassFeatureGenerator.java Fri May 2 12:34:23 2014
@@ -26,25 +26,25 @@ import opennlp.tools.util.StringUtil;
/**
* Generates features for different for the class of the token.
- *
+ *
* @deprecated Use {@link TokenClassFeatureGenerator} instead!
*/
-@Deprecated
+@Deprecated
public class FastTokenClassFeatureGenerator extends FeatureGeneratorAdapter {
private static final String TOKEN_CLASS_PREFIX = "wc";
private static final String TOKEN_AND_CLASS_PREFIX = "w&c";
private static Pattern capPeriod;
-
+
static {
capPeriod = Pattern.compile("^[A-Z]\\.$");
}
-
+
private boolean generateWordAndClassFeature;
-
-
+
+
public FastTokenClassFeatureGenerator() {
this(false);
}
@@ -53,11 +53,11 @@ public class FastTokenClassFeatureGenera
this.generateWordAndClassFeature = genearteWordAndClassFeature;
}
-
+
public static String tokenFeature(String token) {
StringPattern pattern = StringPattern.recognize(token);
-
+
String feat;
if (pattern.isAllLowerCaseLetter()) {
feat = "lc";
@@ -106,8 +106,8 @@ public class FastTokenClassFeatureGenera
return (feat);
}
-
-
+
+
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
String wordClass = tokenFeature(tokens[index]);
features.add(TOKEN_CLASS_PREFIX + "=" + wordClass);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorFactory.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorFactory.java Fri May 2 12:34:23 2014
@@ -23,16 +23,16 @@ package opennlp.tools.util.featuregen;
* <p>
* <b>Note:</b><br>
* All implementing classes must be thread safe.
- *
+ *
* @see AdaptiveFeatureGenerator
* @see FeatureGeneratorResourceProvider
- *
- *
+ *
+ *
* @deprecated do not use this interface, will be removed!
*/
@Deprecated
public interface FeatureGeneratorFactory {
-
+
/**
* Constructs a new {@link AdaptiveFeatureGenerator}.
* <p>
@@ -41,9 +41,9 @@ public interface FeatureGeneratorFactory
* between multiple instances of feature generators. If that is not the
* case the implementor should make a copy of the resource object.
* All resource objects that are included in OpenNLP can be assumed to be thread safe.
- *
+ *
* @param resourceProvider provides access to resources which are needed for feature generation.
- *
+ *
* @return the newly created feature generator
*/
@Deprecated
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorResourceProvider.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorResourceProvider.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorResourceProvider.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorResourceProvider.java Fri May 2 12:34:23 2014
@@ -28,12 +28,12 @@ package opennlp.tools.util.featuregen;
* All implementing classes must be thread safe.
*/
public interface FeatureGeneratorResourceProvider {
-
+
/**
* Retrieves the resource object for the given name/identifier.
- *
+ *
* @param resourceIdentifier the identifier which names the resource.
- *
+ *
* @return the resource object
*/
Object getResource(String resourceIdentifier);