You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2023/01/02 14:04:50 UTC
[opennlp] branch main updated: OPENNLP-1431 Enhance JavaDoc in opennlp.tools.dictionary and opennlp.tools.entitylinker packages (#477)
This is an automated email from the ASF dual-hosted git repository.
jzemerick pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 2ae1782d OPENNLP-1431 Enhance JavaDoc in opennlp.tools.dictionary and opennlp.tools.entitylinker packages (#477)
2ae1782d is described below
commit 2ae1782d66cd27e225aca68c7b6dafbdf31c33c9
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Mon Jan 2 15:04:45 2023 +0100
OPENNLP-1431 Enhance JavaDoc in opennlp.tools.dictionary and opennlp.tools.entitylinker packages (#477)
- adds missing JavaDoc
- improves existing documentation for clarity
- removes superfluous text
- adds 'final' modifier where useful and applicable
- adds 'Override' annotation where useful and applicable
- adds `package-info.java` file to entitylinker package
- fixes some typos
---
.../java/opennlp/tools/dictionary/Dictionary.java | 95 +++++++++++----------
.../main/java/opennlp/tools/dictionary/Index.java | 18 ++--
.../tools/dictionary/serializer/Attributes.java | 22 ++---
.../serializer/DictionaryEntryPersistor.java | 69 ++++++++-------
.../opennlp/tools/dictionary/serializer/Entry.java | 22 ++---
.../tools/dictionary/serializer/EntryInserter.java | 6 +-
.../java/opennlp/tools/entitylinker/BaseLink.java | 55 ++++--------
.../opennlp/tools/entitylinker/EntityLinker.java | 80 ++++++++++--------
.../tools/entitylinker/EntityLinkerFactory.java | 47 ++++++-----
.../tools/entitylinker/EntityLinkerProperties.java | 30 ++++---
.../opennlp/tools/entitylinker/LinkedSpan.java | 97 ++++++++++++++++------
.../package-info.java} | 18 +---
12 files changed, 302 insertions(+), 257 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
index a7684b13..f3333d4d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
@@ -37,7 +37,10 @@ import opennlp.tools.util.model.DictionarySerializer;
import opennlp.tools.util.model.SerializableArtifact;
/**
- * This class is a dictionary.
+ * An iterable and serializable dictionary implementation.
+ *
+ * @see SerializableArtifact
+ * @see Iterable
*/
public class Dictionary implements Iterable<StringList>, SerializableArtifact {
@@ -90,19 +93,24 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
}
}
- private Set<StringListWrapper> entrySet = new HashSet<>();
+ private final Set<StringListWrapper> entrySet = new HashSet<>();
private final boolean isCaseSensitive;
private int minTokenCount = 99999;
private int maxTokenCount = 0;
-
/**
* Initializes an empty {@link Dictionary}.
+ * By default, the resulting instance will not be case-sensitive.
*/
public Dictionary() {
this(false);
}
+ /**
+ * Initializes an empty {@link Dictionary}.
+ *
+ * @param caseSensitive Whether the new instance will operate case-sensitive, or not.
+ */
public Dictionary(boolean caseSensitive) {
isCaseSensitive = caseSensitive;
}
@@ -110,8 +118,9 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
/**
* Initializes the {@link Dictionary} from an existing dictionary resource.
*
- * @param in {@link InputStream}
- * @throws IOException
+ * @param in The {@link InputStream} that references the dictionary content.
+ *
+ * @throws IOException Thrown if IO errors occurred.
*/
public Dictionary(InputStream in) throws IOException {
isCaseSensitive = DictionaryEntryPersistor.create(in, entry -> put(entry.getTokens()));
@@ -128,18 +137,10 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
maxTokenCount = StrictMath.max(maxTokenCount, tokens.size());
}
- /**
- *
- * @return minimum token count in the dictionary
- */
public int getMinTokenCount() {
return minTokenCount;
}
- /**
- *
- * @return maximum token count in the dictionary
- */
public int getMaxTokenCount() {
return maxTokenCount;
}
@@ -147,8 +148,8 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
/**
* Checks if this dictionary has the given entry.
*
- * @param tokens query
- * @return true if it contains the entry otherwise false
+ * @param tokens The query of tokens to be checked for.
+ * @return {@code true} if it contains the entry, {@code false} otherwise.
*/
public boolean contains(StringList tokens) {
return entrySet.contains(new StringListWrapper(tokens));
@@ -157,30 +158,32 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
/**
* Removes the given tokens form the current instance.
*
- * @param tokens filter tokens
+ * @param tokens The tokens to be filtered out (= removed).
*/
public void remove(StringList tokens) {
entrySet.remove(new StringListWrapper(tokens));
}
/**
- * Retrieves an Iterator over all tokens.
- *
- * @return token-{@link Iterator}
+ * @return Retrieves a token-{@link Iterator} over all elements.
*/
+ @Override
public Iterator<StringList> iterator() {
final Iterator<StringListWrapper> entries = entrySet.iterator();
- return new Iterator<StringList>() {
+ return new Iterator<>() {
+ @Override
public boolean hasNext() {
return entries.hasNext();
}
+ @Override
public StringList next() {
return entries.next().getStringList();
}
+ @Override
public void remove() {
entries.remove();
}
@@ -188,9 +191,7 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
}
/**
- * Retrieves the number of tokens in the current instance.
- *
- * @return number of tokens
+ * @return Retrieves the number of tokens in the current instance.
*/
public int size() {
return entrySet.size();
@@ -199,18 +200,20 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
/**
* Writes the current instance to the given {@link OutputStream}.
*
- * @param out {@link OutputStream}
- * @throws IOException
+ * @param out A valid {@link OutputStream}, ready for serialization.
+ * @throws IOException Thrown if IO errors occurred.
*/
public void serialize(OutputStream out) throws IOException {
- Iterator<Entry> entryIterator = new Iterator<Entry>() {
- private Iterator<StringList> dictionaryIterator = Dictionary.this.iterator();
+ Iterator<Entry> entryIterator = new Iterator<>() {
+ private final Iterator<StringList> dictionaryIterator = Dictionary.this.iterator();
+ @Override
public boolean hasNext() {
return dictionaryIterator.hasNext();
}
+ @Override
public Entry next() {
StringList tokens = dictionaryIterator.next();
@@ -218,6 +221,7 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
return new Entry(tokens, new Attributes());
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -258,12 +262,12 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
}
/**
- * Reads a dictionary which has one entry per line. The tokens inside an
- * entry are whitespace delimited.
+ * Reads a {@link Dictionary} which has one entry per line.
+ * The tokens inside an entry are whitespace delimited.
*
- * @param in {@link Reader}
- * @return the parsed dictionary
- * @throws IOException
+ * @param in A {@link Reader} instance used to parse the dictionary from.
+ * @return The parsed {@link Dictionary} instance; guaranteed to be non-{@code null}.
+ * @throws IOException Thrown if IO errors occurred during read and parse operations.
*/
public static Dictionary parseOneEntryPerLine(Reader in) throws IOException {
BufferedReader lineReader = new BufferedReader(in);
@@ -291,31 +295,33 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
}
/**
- * Gets this dictionary as a {@code Set<String>}. Only {@code iterator()},
- * {@code size()} and {@code contains(Object)} methods are implemented.
- *
+ * Converts this {@link Dictionary} to a {@link Set<String>}.
+ * <p>
+ * Note: Only {@link AbstractSet#iterator()}, {@link AbstractSet#size()} and
+ * {@link AbstractSet#contains(Object)} methods are implemented.
+ * <p>
* If this dictionary entries are multi tokens only the first token of the
- * entry will be part of the Set.
+ * entry will be part of the {@link Set}.
*
- * @return a Set containing the entries of this dictionary
+ * @return A {@link Set} containing all entries of this {@link Dictionary}.
*/
public Set<String> asStringSet() {
- return new AbstractSet<String>() {
+ return new AbstractSet<>() {
@Override
public Iterator<String> iterator() {
final Iterator<StringListWrapper> entries = entrySet.iterator();
- return new Iterator<String>() {
-
+ return new Iterator<>() {
+ @Override
public boolean hasNext() {
return entries.hasNext();
}
-
+ @Override
public String next() {
return entries.next().getStringList().getToken(0);
}
-
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -344,8 +350,9 @@ public class Dictionary implements Iterable<StringList>, SerializableArtifact {
}
/**
- * Gets the Serializer Class for {@link Dictionary}
- * @return {@link DictionarySerializer}
+ * @return Retrieves the serializer class for {@link Dictionary}
+ *
+ * @see DictionarySerializer
*/
@Override
public Class<?> getArtifactSerializerClass() {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Index.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Index.java
index 7d1245ef..18f8f952 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Index.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Index.java
@@ -24,24 +24,23 @@ import java.util.Set;
import opennlp.tools.util.StringList;
/**
- * This classes indexes {@link StringList}s. This makes it possible
+ * This classes indexes {@link StringList string lists}. This makes it possible
* to check if a certain token is contained in at least one of the
* {@link StringList}s.
*/
public class Index {
- private Set<String> tokens = new HashSet<>();
+ private final Set<String> tokens = new HashSet<>();
/**
- * Initializes the current instance with the given
- * {@link StringList} {@link Iterator}.
+ * Initializes an {@link Index} with the given {@link Iterator}
+ * over {@link StringList} elements.
*
- * @param tokenLists
+ * @param tokenLists The iterable {@link StringList} elements.
*/
public Index(Iterator<StringList> tokenLists) {
while (tokenLists.hasNext()) {
-
StringList tokens = tokenLists.next();
for (int i = 0; i < tokens.size(); i++) {
@@ -51,12 +50,11 @@ public class Index {
}
/**
- * Checks if at leat one {@link StringList} contains the
- * given token.
+ * Checks if at least one {@link StringList} contains the specified {@code token}.
*
- * @param token
+ * @param token The element to check for.
*
- * @return true if the token is contained otherwise false.
+ * @return {@code true} if the token is contained, {@code false} otherwise.
*/
public boolean contains(String token) {
return tokens.contains(token);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Attributes.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Attributes.java
index cc373b30..d416dfa5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Attributes.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Attributes.java
@@ -24,20 +24,20 @@ import java.util.Objects;
/**
* The {@link Attributes} class stores name value pairs.
- *
- * Problem: The HashMap for storing the name value pairs has a very high
- * memory footprint, replace it.
+ * <p>
+ * Problem: If a {@link HashMap} is used storing name-value pairs this results
+ * in a very high memory footprint, replace it.
*/
public class Attributes {
- private Map<String, String> mNameValueMap = new HashMap<>();
+ private final Map<String, String> mNameValueMap = new HashMap<>();
/**
- * Retrieves the value for the given key or null if attribute it not set.
+ * Retrieves the value for the given key or {@code null} if attribute it not set.
*
- * @param key
+ * @param key The key to get the desired value for.
*
- * @return the value
+ * @return The value for the given key or {@code null}
*/
public String getValue(String key) {
return mNameValueMap.get(key);
@@ -46,8 +46,10 @@ public class Attributes {
/**
* Sets a key/value pair.
*
- * @param key
- * @param value
+ * @param key The key that uniquely identifies the specified {@code value}.
+ * Must not be {@code null}.
+ * @param value The value that shall be retrievable via its {@code key}.
+ * Must not be {@code null}.
*/
public void setValue(String key, String value) {
Objects.requireNonNull(key, "key must not be null");
@@ -59,7 +61,7 @@ public class Attributes {
/**
* Iterates over the keys.
*
- * @return key-{@link Iterator}
+ * @return Retrieves a key-based {@link Iterator}.
*/
public Iterator<String> iterator() {
return mNameValueMap.keySet().iterator();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index 603afade..90ba6665 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -40,28 +40,30 @@ import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.XMLReaderFactory;
+import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
import opennlp.tools.util.model.UncloseableInputStream;
/**
- * This class is used by for reading and writing dictionaries of all kinds.
+ * A persistor used by for reading and writing {@link Dictionary dictionaries}
+ * of all kinds.
+ *
+ * @see Dictionary
*/
public class DictionaryEntryPersistor {
// TODO: should check for invalid format, make it save
private static class DictionaryContenthandler implements ContentHandler {
- private EntryInserter mInserter;
+ private final EntryInserter mInserter;
- // private boolean mIsInsideDictionaryElement;
- // private boolean mIsInsideEntryElement;
private boolean mIsInsideTokenElement;
private boolean mIsCaseSensitiveDictionary;
- private List<String> mTokenList = new LinkedList<>();
+ private final List<String> mTokenList = new LinkedList<>();
- private StringBuilder token = new StringBuilder();
+ private final StringBuilder token = new StringBuilder();
private Attributes mAttributes;
@@ -73,6 +75,7 @@ public class DictionaryEntryPersistor {
/**
* Not implemented.
*/
+ @Override
public void processingInstruction(String target, String data)
throws SAXException {
}
@@ -80,9 +83,11 @@ public class DictionaryEntryPersistor {
/**
* Not implemented.
*/
+ @Override
public void startDocument() throws SAXException {
}
+ @Override
public void startElement(String uri, String localName, String qName,
org.xml.sax.Attributes atts) throws SAXException {
if (DICTIONARY_ELEMENT.equals(localName)) {
@@ -111,6 +116,7 @@ public class DictionaryEntryPersistor {
}
}
+ @Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if (mIsInsideTokenElement) {
@@ -122,6 +128,7 @@ public class DictionaryEntryPersistor {
* Creates the Profile object after processing is complete
* and switches mIsInsideNgramElement flag.
*/
+ @Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
@@ -151,18 +158,21 @@ public class DictionaryEntryPersistor {
/**
* Not implemented.
*/
+ @Override
public void endDocument() throws SAXException {
}
/**
* Not implemented.
*/
+ @Override
public void endPrefixMapping(String prefix) throws SAXException {
}
/**
* Not implemented.
*/
+ @Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
}
@@ -170,18 +180,21 @@ public class DictionaryEntryPersistor {
/**
* Not implemented.
*/
+ @Override
public void setDocumentLocator(Locator locator) {
}
/**
* Not implemented.
*/
+ @Override
public void skippedEntity(String name) throws SAXException {
}
/**
* Not implemented.
*/
+ @Override
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
@@ -198,16 +211,17 @@ public class DictionaryEntryPersistor {
/**
* Creates {@link Entry}s from the given {@link InputStream} and
* forwards these {@link Entry}s to the {@link EntryInserter}.
- *
+ * <p>
+ * <b>Note:</b>
* After creation is finished the provided {@link InputStream} is closed.
*
- * @param in stream to read entries from
+ * @param in The open {@link InputStream} to read entries from.
* @param inserter inserter to forward entries to
*
- * @return isCaseSensitive attribute for Dictionary
+ * @return The {@code isCaseSensitive} attribute of a {@link Dictionary}.
*
- * @throws IOException
- * @throws InvalidFormatException
+ * @throws IOException Thrown if IO errors occurred.
+ * @throws InvalidFormatException Thrown if parameters were invalid.
*/
public static boolean create(InputStream in, EntryInserter inserter)
throws IOException {
@@ -230,16 +244,17 @@ public class DictionaryEntryPersistor {
/**
* Serializes the given entries to the given {@link OutputStream}.
- *
+ * <p>
+ * <b>Note:</b>
* After the serialization is finished the provided
* {@link OutputStream} remains open.
*
- * @param out stream to serialize to
- * @param entries entries to serialize
+ * @param out The {@link OutputStream} to serialize to.
+ * @param entries The {@link Entry entries} to serialize.
*
- * @throws IOException If an I/O error occurs
- * @deprecated Use
- * {@link DictionaryEntryPersistor#serialize(java.io.OutputStream, java.util.Iterator, boolean)} instead
+ * @throws IOException Thrown if IO errors occurred.
+ * @throws InvalidFormatException Thrown if parameters were invalid.
+ * @deprecated Use {@link DictionaryEntryPersistor#serialize(OutputStream, Iterator, boolean)} instead.
*/
@Deprecated
public static void serialize(OutputStream out, Iterator<Entry> entries)
@@ -249,16 +264,18 @@ public class DictionaryEntryPersistor {
/**
* Serializes the given entries to the given {@link OutputStream}.
- *
+ * <p>
+ * <b>Note:</b>
* After the serialization is finished the provided
* {@link OutputStream} remains open.
*
- * @param out stream to serialize to
- * @param entries entries to serialize
- * @param casesensitive indicates if the written dictionary
- * should be case sensitive or case insensitive.
+ * @param out The {@link OutputStream} to serialize to.
+ * @param entries The {@link Entry entries} to serialize.
+ * @param casesensitive Indicates if the written dictionary should be
+ * case-sensitive, or not.
*
- * @throws IOException If an I/O error occurs
+ * @throws IOException Thrown if IO errors occurred.
+ * @throws InvalidFormatException Thrown if parameters were invalid.
*/
public static void serialize(OutputStream out, Iterator<Entry> entries,
boolean casesensitive) throws IOException {
@@ -270,7 +287,7 @@ public class DictionaryEntryPersistor {
try {
hd = tf.newTransformerHandler();
} catch (TransformerConfigurationException e) {
- throw new AssertionError("The Transformer configuration must be valid!");
+ throw new InvalidFormatException("The Transformer configuration must be valid!");
}
Transformer serializer = hd.getTransformer();
@@ -279,24 +296,20 @@ public class DictionaryEntryPersistor {
hd.setResult(streamResult);
-
try {
hd.startDocument();
AttributesImpl dictionaryAttributes = new AttributesImpl();
-
dictionaryAttributes.addAttribute("", "", ATTRIBUTE_CASE_SENSITIVE,
"", String.valueOf(casesensitive));
hd.startElement("", "", DICTIONARY_ELEMENT, dictionaryAttributes);
while (entries.hasNext()) {
Entry entry = entries.next();
-
serializeEntry(hd, entry);
}
hd.endElement("", "", DICTIONARY_ELEMENT);
-
hd.endDocument();
}
catch (SAXException e) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Entry.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Entry.java
index fbdfd36b..c40f3933 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Entry.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/Entry.java
@@ -23,22 +23,22 @@ import opennlp.tools.util.StringList;
/**
* An {@link Entry} is a {@link StringList} which can
* optionally be mapped to attributes.
- *
- * {@link Entry}s is a read and written by the {@link DictionaryEntryPersistor}.
+ * <p>
+ * {@link Entry entries} are read and written by the {@link DictionaryEntryPersistor}.
*
* @see DictionaryEntryPersistor
* @see Attributes
*/
public class Entry {
- private StringList tokens;
- private Attributes attributes;
+ private final StringList tokens;
+ private final Attributes attributes;
/**
- * Initializes the current instance.
+ * Initializes an {@link Entry}.
*
- * @param tokens
- * @param attributes
+ * @param tokens The tokens to keep.
+ * @param attributes The (optional) {@link Attributes} to set.
*/
public Entry(StringList tokens, Attributes attributes) {
this.tokens = tokens;
@@ -46,18 +46,14 @@ public class Entry {
}
/**
- * Retrieves the tokens.
- *
- * @return the tokens
+ * @return Retrieves the tokens.
*/
public StringList getTokens() {
return tokens;
}
/**
- * Retrieves the {@link Attributes}.
- *
- * @return the {@link Attributes}
+ * @return Retrieves the {@link Attributes}.
*/
public Attributes getAttributes() {
return attributes;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java
index 02d620f3..1e1a93d9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java
@@ -23,9 +23,9 @@ import opennlp.tools.util.InvalidFormatException;
public interface EntryInserter {
/**
- *
- * @param entry
- * @throws InvalidFormatException
+ * @param entry The {@link Entry} to insert.
+ *
+ * @throws InvalidFormatException Thrown if the {@code entry} has an invalid format.
*/
void insert(Entry entry) throws InvalidFormatException;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java
index deb6ab0a..3509abae 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java
@@ -18,12 +18,15 @@
package opennlp.tools.entitylinker;
import java.util.HashMap;
+import java.util.Map;
import java.util.Objects;
/**
- * Stores a minimal tuple of information. Intended to be used with LinkedSpan
- *
+ * Represents a minimal tuple of information.
+ * Intended to be used with {@link LinkedSpan}.
*
+ * @see EntityLinker
+ * @see LinkedSpan
*/
public abstract class BaseLink {
@@ -31,10 +34,7 @@ public abstract class BaseLink {
private String itemID;
private String itemName;
private String itemType;
- private HashMap<String, Double> scoreMap = new HashMap<>();
-
- public BaseLink() {
- }
+ private Map<String, Double> scoreMap = new HashMap<>();
public BaseLink(String itemParentID, String itemID, String itemName, String itemType) {
this.itemParentID = itemParentID;
@@ -43,84 +43,59 @@ public abstract class BaseLink {
this.itemType = itemType;
}
- /**
- * Any parent ID for the linked item
- * @return
- */
public String getItemParentID() {
return itemParentID;
}
/**
- * returns the parent ID of the linked item
- * @param itemParentID
+ * @param itemParentID The parent ID of the linked item
*/
public void setItemParentID(String itemParentID) {
this.itemParentID = itemParentID;
}
- /**
- * returns the itemid
- *
- * @return
- */
public String getItemID() {
return itemID;
}
/**
- * sets the item id. This field can store, for example, the primary key of a
- * row in an external/linked database
- *
- * @param itemID
+ * @param itemID This field can store, for example, the primary key of
+ * a now in an external/linked data source.
*/
public void setItemID(String itemID) {
this.itemID = itemID;
}
- /**
- * returns the name
- *
- * @return
- */
public String getItemName() {
return itemName;
}
/**
- * Sets the item name. An item name can be the native name (often a normalized
- * version of something) from an external linked database
- *
- * @param itemName
+ * @param itemName An item name can be the native name (often a normalized
+ * version of something) from an external linked data source.
*/
public void setItemName(String itemName) {
this.itemName = itemName;
}
- /**
- * returns the type
- *
- * @return
- */
public String getItemType() {
return itemType;
}
/**
- * sets the item type. An item type can be the native type from an external
- * linked database. For instance, a product type or code
*
- * @param itemType
+ * @param itemType An item type can be the native type from an external
+ * linked database. For instance, a product type or code.
*/
public void setItemType(String itemType) {
this.itemType = itemType;
}
- public HashMap<String, Double> getScoreMap() {
+ public Map<String, Double> getScoreMap() {
return scoreMap;
}
- public void setScoreMap(HashMap<String, Double> scoreMap) {
+ public void setScoreMap(Map<String, Double> scoreMap) {
this.scoreMap = scoreMap;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
index c62342a5..2430445a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
@@ -23,72 +23,80 @@ import java.util.List;
import opennlp.tools.util.Span;
/**
- * EntityLinkers establish connections to external data to enrich extracted
- * entities. For instance, for Location entities a linker can be developed to
- * lookup each found location in a geonames gazateer. Another example may be to
+ * EntityLinkers establish connections with external data to enrich extracted
+ * entities.
+ * <p>
+ * For instance, for Location entities a linker can be developed to
+ * look up each found location in a geonames gazetteer. Another example may be to
* find peoples' names and look them up in a database or active directory.
- * Intended to return n best matches for any give search, but can also be
- * implemented as deterministic
+ * Intended to return n best matches for any given search, but can also be
+ * implemented as deterministic.
*
- * @param <T> A type that extends Span. LinkedSpan and BaseLink are provided to
- * provide this signature: EntityLinker<LinkedSpan<BaseLink>> as a
- * default
+ * @param <T> A type that extends {@link Span}. {@link LinkedSpan} and {@link BaseLink}
+ * are available to provide this signature. Use:
+ * {@link EntityLinker}<LinkedSpan<BaseLink>> as a default.
*/
public interface EntityLinker<T extends Span> {
/**
- * allows for passing properties through the EntityLinkerFactory into all
- * impls dynamically. EntityLinker impls should initialize reusable objects
+ * Initializes an {@link EntityLinker} and allows for passing properties
+ * through the {@link EntityLinkerFactory} into all impls dynamically.
+ * <p>
+ * {@link EntityLinker} impls should initialize reusable objects
* used by the impl in this method. If this is done, any errors will be
- * captured and thrown by the EntityLinkerFactory.
+ * captured and thrown by the {@link EntityLinkerFactory}.
*
- * @param initializationData the EntityLinkerProperties object that contains
+ * @param initializationData The {@link EntityLinkerProperties} that contains
* properties needed by the impl, as well as any
- * other objects required for the impl
- * @throws java.io.IOException
+ * other objects required.
+ * @throws IOException Thrown if IO errors occurred.
*/
void init(EntityLinkerProperties initializationData) throws IOException;
/**
- * Links an entire document of named entities to an external source
+ * Links an entire document of named entities to an external source.
*
- * @param doctext the full text of the document
- * @param tokensBySentence a list of tokens spans that correspond to each sentence.
- * The outer array refers to the sentence, the inner
+ * @param doctext The full text of the document.
+ * @param sentences An array of {@link Span sentence spans}.
+ * @param tokensBySentence An array of {@link Span tokens spans} that correspond to
+ * each sentence. The outer array refers to the sentence, the inner
* array is the tokens for the outer sentence. Similar
- * in nature to Map of SentenceIndex keys to Listof
- * tokens as values
- * @param namesBySentence a list of name spans that correspond to each
+ * in nature to Map of SentenceIndex keys to List of
+ * tokens as values.
+ * @param namesBySentence An array of {@link Span name spans} that correspond to each
* sentence. The outer array refers to the sentence,
* the inner array refers to the tokens that for the
- * same sentence.Similar in nature to
+ * same sentence. Similar in nature to
* Map<SentenceIndex,List<Name Spans For This
- * Sentence's Tokens>> @ return
- * @return
+ * Sentence's Tokens>> @ return.
+ *
+ * @return A list of {@link T} instances.
*/
List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence, Span[][] namesBySentence);
/**
- * Links the names that correspond to the tokens[] spans. The sentenceindex
+ * Links the names that correspond to the tokens[] spans. The {@code sentenceIndex}
* can be used to get the sentence text and tokens from the text based on the
* sentence and token spans. The text is available for additional context.
*
- * @param doctext the full text of the document
- * @param tokensBySentence a list of tokens spans that correspond to each sentence.
- * The outer array refers to the sentence, the inner
+ * @param doctext The full text of the document.
+ * @param sentences An array of {@link Span sentence spans}.
+ * @param tokensBySentence An array of {@link Span tokens spans} that correspond to each
+ * sentence. The outer array refers to the sentence, the inner
* array is the tokens for the outer sentence. Similar
- * in nature to Map of SentenceIndex keys to Listof
- * tokens as values
- * @param namesBySentence a list of name spans that correspond to each
+ * in nature to Map of SentenceIndex keys to List of
+ * tokens as values.
+ * @param namesBySentence An array of {@link Span name spans} that correspond to each
* sentence. The outer array refers to the sentence,
* the inner array refers to the tokens that for the
- * same sentence.Similar in nature to
+ * same sentence. Similar in nature to
* Map<SentenceIndex,List<Name Spans For This
- * Sentence's Tokens>> @ return
- * @param sentenceIndex the index to the sentence span that the tokens[]
- * Span[] corresponds to
- * @return
+ * Sentence's Tokens>> @ return.
+ * @param sentenceIndex The index to the sentence span that the {@code tokensBySentence}
+ * corresponds to.
+ *
+ * @return A list of {@link T} instances.
*/
List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence,
Span[][] namesBySentence, int sentenceIndex);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
index 8f07f8ee..244f046d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java
@@ -23,23 +23,30 @@ import java.util.Objects;
import opennlp.tools.util.ext.ExtensionLoader;
/**
- * Generates an EntityLinker implementation via properties file configuration
- *
+ * Generates a {@link EntityLinker} instances via a {@code properties} file configuration.
+ * <p>
+ * In the properties file, the linker implementation must be
+ * provided using {@code "linker"} as the properties key, and the
+ * full class name as value.
*/
public class EntityLinkerFactory {
/**
- *
+ * Retrieves a {@link EntityLinker} instance matching the {@code properties} configuration.
*
* @param entityType The type of entity being linked to. This value is used to
- * retrieve the implementation of the entitylinker from the
- * entitylinker properties file.
- * @param properties An object that extends EntityLinkerProperties. This
- * object will be passed into the implemented EntityLinker
- * init(..) method, so it is an appropriate place to put
- * additional resources.
- * @return an EntityLinker impl
- * @throws java.io.IOException
+ * retrieve the implementation of the {@link EntityLinker} from the
+ * {@link EntityLinker} properties file.
+ * Must not be {@code null}.
+ * @param properties An object that extends {@link EntityLinkerProperties}.
+ * This object will be passed into the
+ * {@link EntityLinker#init(EntityLinkerProperties)} method,
+ * so it is an appropriate place to put additional resources.
+ * Must not be {@code null}.
+ *
+ * @return The {@link EntityLinker} instance for the {@code properties} configuration.
+ * @throws IOException Thrown if IO errors occurred.
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public static synchronized EntityLinker<?> getLinker(String entityType, EntityLinkerProperties properties)
throws IOException {
@@ -59,17 +66,17 @@ public class EntityLinkerFactory {
}
/**
+ * Retrieves a {@link EntityLinker} instance matching the {@code properties} configuration.
*
+ * @param properties An object that extends {@link EntityLinkerProperties}.
+ * This object will be passed into the
+ * {@link EntityLinker#init(EntityLinkerProperties)} method,
+ * so it is an appropriate place to put additional resources.
+ * Must not be {@code null}.
*
- *
- * @param properties An object that extends EntityLinkerProperties. This
- * object will be passed into the implemented EntityLinker
- * init(..) method, so it is an appropriate place to put
- * additional resources. In the properties file, the linker implementation must be
- * provided using "linker" as the properties key, and the
- * full class name as value
- * @return an EntityLinker impl
- * @throws java.io.IOException
+ * @return The {@link EntityLinker} instance for the {@code properties} configuration.
+ * @throws IOException Thrown if IO errors occurred.
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public static synchronized EntityLinker<?> getLinker(EntityLinkerProperties properties) throws IOException {
Objects.requireNonNull(properties, "properties argument must not be null");
diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
index f57325da..026efaf0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java
@@ -24,18 +24,21 @@ import java.io.InputStream;
import java.util.Properties;
/**
- * Properties wrapper for the EntityLinker framework
+ * Properties wrapper for {@link EntityLinker} implementations.
*
+ * @see EntityLinkerFactory
*/
public class EntityLinkerProperties {
private Properties props;
/**
- * Constructor takes location of properties file as arg
+ * Initializes {@link EntityLinkerProperties} via a {@link File} reference.
*
- * @param propertiesfile the properties file
- * @throws IOException
+ * @param propertiesfile The {@link File} that references the {@code *.properties}
+ * configuration.
+ *
+ * @throws IOException Thrown if IO errors occurred.
*/
public EntityLinkerProperties(File propertiesfile) throws IOException {
try (InputStream stream = new FileInputStream(propertiesfile)) {
@@ -44,11 +47,12 @@ public class EntityLinkerProperties {
}
/**
+ * Initializes {@link EntityLinkerProperties} via a {@link InputStream} reference.
*
- * @param propertiesIn inputstream of properties file. Stream will not be
- * closed
- * @throws IOException
+ * @param propertiesIn The {@link InputStream} that references the {@code *.properties}
+ * configuration.
*
+ * @throws IOException Thrown if IO errors occurred.
*/
public EntityLinkerProperties(InputStream propertiesIn) throws IOException {
init(propertiesIn);
@@ -60,15 +64,15 @@ public class EntityLinkerProperties {
}
/**
- * Gets a property from the props file.
+ * Retrieves a property value for a given {@code key}.
*
- * @param key the key to the desired item in the properties file
- * (key=value)
- * @param defaultValue a default value in case the key, or the value are
+ * @param key The key to the desired item in the properties configuration
+ * {@code key=value}
+ * @param defaultValue A default value in case the {@code key}, or the value are
* missing
- * @return a property value in the form of a string
+ * @return A property value as a {@link String}.
- * @throws IOException when the properties object was somehow not initialized properly
+ * @throws IOException Thrown if the properties object was not initialized properly.
*/
public String getProperty(String key, String defaultValue) throws IOException {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java
index f735c3de..8836f2a5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java
@@ -22,9 +22,9 @@ import java.util.Objects;
import opennlp.tools.util.Span;
/**
- * A "default" extended span that holds additional information about the Span
+ * A default, extended {@link Span} that holds additional information about a {@link Span}.
*
- * @param <T>
+ * @param <T> The generic type that specializes a {@link BaseLink}.
*/
public class LinkedSpan<T extends BaseLink> extends Span {
@@ -32,79 +32,124 @@ public class LinkedSpan<T extends BaseLink> extends Span {
private int sentenceid = 0;
private String searchTerm;
+ /**
+ * Initializes a new {@link LinkedSpan}. Sets the prob to {@code 0} as default.
+ *
+ * @param linkedEntries The {@code n} best linked entries from
+ * an external data source.
+ * @param s The start position of a {@link Span}.
+ * Must be equal to or greater than {@code 0}.
+ * Must not be greater than {@code e}.
+ * @param e The end position of a {@link Span}, which is {@code +1}
+ * more than the last element in the span.
+ * Must be equal to or greater than {@code 0}.
+ * @param type The type of the span.
+ *
+ * @throws IllegalArgumentException Thrown if given parameters are invalid.
+ */
public LinkedSpan(ArrayList<T> linkedEntries, int s, int e, String type) {
- super(s, e, type);
- this.linkedEntries = linkedEntries;
+ this(linkedEntries, s, e, type, 0d);
}
+ /**
+ * Initializes a new {@link LinkedSpan}.
+ *
+ * @param linkedEntries The {@code n} best linked entries from
+ * an external data source.
+ * @param s The start position of a {@link Span}.
+ * Must be equal to or greater than {@code 0}.
+ * Must not be greater than {@code e}.
+ * @param e The end position of a {@link Span}, which is {@code +1}
+ * more than the last element in the span.
+ * Must be equal to or greater than {@code 0}.
+ * @param type The type of the span.
+ * @param prob The probability of the {@link Span}.
+ *
+ * @throws IllegalArgumentException Thrown if given parameters are invalid.
+ */
public LinkedSpan(ArrayList<T> linkedEntries, int s, int e, String type, double prob) {
super(s, e, type, prob);
this.linkedEntries = linkedEntries;
}
+ /**
+ * Initializes a new {@link LinkedSpan}.
+ *
+ * @param linkedEntries The {@code n} best linked entries from
+ * an external data source.
+ * @param s The start position of a {@link Span}.
+ * Must be equal to or greater than {@code 0}.
+ * Must not be greater than {@code e}.
+ * @param e The end position of a {@link Span}, which is {@code +1}
+ * more than the last element in the span.
+ * Must be equal to or greater than {@code 0}.
+ *
+ * @throws IllegalArgumentException Thrown if given parameters are invalid.
+ */
public LinkedSpan(ArrayList<T> linkedEntries, int s, int e) {
super(s, e);
this.linkedEntries = linkedEntries;
}
+ /**
+ * Initializes a new {@link LinkedSpan} via an existing {@link Span}
+ * which is shifted by the specified {@code offset}.
+ *
+ * @param linkedEntries The {@code n} best linked entries from
+ * an external data source.
+ * @param span The existing {@link Span}.
+ * @param offset The positive or negative shift offset.
+ *
+ * @throws IllegalArgumentException Thrown if given parameters are invalid.
+ */
public LinkedSpan(ArrayList<T> linkedEntries, Span span, int offset) {
super(span, offset);
this.linkedEntries = linkedEntries;
}
/**
- * Returns the n best linked entries from an external data source. For
- * instance, this will hold gazateer entries for a search into a geonames
- * gazateer
- *
- * @return
+ * @return Retrieves the {@code n} best linked entries from an external data source.
+ * For instance, this will hold gazetteer entries for a search into a geonames
+ * gazetteer.
*/
public ArrayList<T> getLinkedEntries() {
return linkedEntries;
}
/**
- * Sets the n best linked entries from an external data source. For instance,
- * this will hold gazateer entries for a search into a geonames gazateer
- *
+ * @param linkedEntries The {@code n} best linked entries from an external data source.
+ * For instance, this will hold gazetteer entries for a search
+ * into a geonames gazetteer.
*/
public void setLinkedEntries(ArrayList<T> linkedEntries) {
this.linkedEntries = linkedEntries;
}
/**
- * Returns the id or index of the sentence from which this span was extracted
- *
- * @return
+ * @return Retrieves the id or index of the sentence from which this span was extracted.
*/
public int getSentenceid() {
return sentenceid;
}
/**
- * sets the id or index of the sentence from which this span was extracted
- *
- * @param sentenceid
+ * @param sentenceid The id or index of the sentence from which this span was extracted.
*/
public void setSentenceid(int sentenceid) {
this.sentenceid = sentenceid;
}
/**
- * Returns the search term that was used to link this span to an external data
- * source
- *
- * @return searchTerm
+ * @return Retrieves the search term that was used to link this span to an external data
+ * source.
*/
public String getSearchTerm() {
return searchTerm;
}
/**
- * sets the search term that is used to link this span to an external data
- * source
- *
- * @param searchTerm
+ * @param searchTerm The search term that is used to link this span to an external data
+ * source.
*/
public void setSearchTerm(String searchTerm) {
this.searchTerm = searchTerm;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/package-info.java
similarity index 75%
copy from opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java
copy to opennlp-tools/src/main/java/opennlp/tools/entitylinker/package-info.java
index 02d620f3..b30a028b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/EntryInserter.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/package-info.java
@@ -15,17 +15,7 @@
* limitations under the License.
*/
-
-package opennlp.tools.dictionary.serializer;
-
-import opennlp.tools.util.InvalidFormatException;
-
-public interface EntryInserter {
-
- /**
- *
- * @param entry
- * @throws InvalidFormatException
- */
- void insert(Entry entry) throws InvalidFormatException;
-}
+/**
+ * Package related to linking entities to external data sources.
+ */
+package opennlp.tools.entitylinker;