You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by to...@apache.org on 2016/01/17 07:33:28 UTC
svn commit: r1725068 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat: DoccatModel.java DocumentCategorizer.java

Author: tommaso
Date: Sun Jan 17 06:33:28 2016
New Revision: 1725068

URL: http://svn.apache.org/viewvc?rev=1725068&view=rev
Log:
OPENNLP-829 - added javadoc to DocumentCategorizer and DoccatModel

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java?rev=1725068&r1=1725067&r2=1725068&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java Sun Jan 17 06:33:28 2016
@@ -29,6 +29,9 @@ import opennlp.tools.util.BaseToolFactor
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.BaseModel;
 
+/**
+ * A model for document categorization
+ */
 public class DoccatModel extends BaseModel {
 
   private static final String COMPONENT_NAME = "DocumentCategorizerME";

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java?rev=1725068&r1=1725067&r2=1725068&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java Sun Jan 17 06:33:28 2016
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.doccat;
 
 import java.util.Map;
@@ -28,31 +26,94 @@ import java.util.SortedMap;
 public interface DocumentCategorizer {
 
   /**
-   * Categorizes the given text.
+   * Categorizes the given text, provided in separate tokens.
    *
-   * @param text
+   * @param text the tokens of text to categorize
+   * @return per category probabilities
    */
-  public double[] categorize(String text[]);
+  double[] categorize(String text[]);
 
-  public double[] categorize(String text[], Map<String, Object> extraInformation);
+  /**
+   * Categorizes the given text, provided in separate tokens.
+   *
+   * @param text             the tokens of text to categorize
+   * @param extraInformation optional extra information to pass for evaluation
+   * @return per category probabilities
+   */
+  double[] categorize(String text[], Map<String, Object> extraInformation);
 
-  public String getBestCategory(double[] outcome);
+  /**
+   * get the best category from previously generated outcome probabilities
+   *
+   * @param outcome a vector of outcome probabilities
+   * @return the best category String
+   */
+  String getBestCategory(double[] outcome);
 
-  public int getIndex(String category);
+  /**
+   * get the index of a certain category
+   *
+   * @param category the category
+   * @return an index
+   */
+  int getIndex(String category);
 
-  public String getCategory(int index);
+  /**
+   * get the category at a given index
+   *
+   * @param index the index
+   * @return a category
+   */
+  String getCategory(int index);
 
-  public int getNumberOfCategories();
+  /**
+   * get the number of categories
+   *
+   * @return the no. of categories
+   */
+  int getNumberOfCategories();
 
-  public double[] categorize(String documentText);
+  /**
+   * categorize a piece of text
+   *
+   * @param documentText the text to categorize
+   * @return the probabilities of each category (sum up to 1)
+   */
+  double[] categorize(String documentText);
 
-  public double[] categorize(String documentText, Map<String, Object> extraInformation);
+  /**
+   * categorize a piece of text, providing extra metadata.
+   *
+   * @param documentText     the text to categorize
+   * @param extraInformation extra metadata
+   * @return the probabilities of each category (sum up to 1)
+   */
+  double[] categorize(String documentText, Map<String, Object> extraInformation);
 
-  public String getAllResults(double results[]);
+  /**
+   * get the name of the category associated with the given probabilties
+   *
+   * @param results the probabilities of each category
+   * @return the name of the outcome
+   */
+  String getAllResults(double results[]);
 
-  public Map<String, Double> scoreMap(String text);
+  /**
+   * Returns a map in which the key is the category name and the value is the score
+   *
+   * @param text the input text to classify
+   * @return a map with the score as a key. The value is a Set of categories with the score.
+   */
+  Map<String, Double> scoreMap(String text);
 
-  public SortedMap<Double, Set<String>> sortedScoreMap(String text);
+  /**
+   * Get a map of the scores sorted in ascending aorder together with their associated categories.
+   * Many categories can have the same score, hence the Set as value
+   *
+   * @param text the input text to classify
+   * @return a map with the score as a key. The value is a Set of categories with the score.
+   */
+  SortedMap<Double, Set<String>> sortedScoreMap(String text);
 
 }