You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2014/08/22 10:02:16 UTC

svn commit: r1619699 - /lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java

Author: tommaso
Date: Fri Aug 22 08:02:15 2014
New Revision: 1619699

URL: http://svn.apache.org/r1619699
Log:
LUCENE-5699 - fixed javadoc

Modified:
    lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java

Modified: lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java?rev=1619699&r1=1619698&r2=1619699&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java (original)
+++ lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java Fri Aug 22 08:02:15 2014
@@ -47,11 +47,34 @@ import org.apache.lucene.util.BytesRef;
  */
 public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
 
+  /**
+   *
+   */
   protected AtomicReader atomicReader;
+
+  /**
+   * names of the fields to be used as input text
+   */
   protected String[] textFieldNames;
+
+  /**
+   * name of the field to be used as a class / category output
+   */
   protected String classFieldName;
+
+  /**
+   * {@link org.apache.lucene.analysis.Analyzer} to be used for tokenizing unseen input text
+   */
   protected Analyzer analyzer;
+
+  /**
+   * {@link org.apache.lucene.search.IndexSearcher} to run searches on the index for retrieving frequencies
+   */
   protected IndexSearcher indexSearcher;
+
+  /**
+   * {@link org.apache.lucene.search.Query} used to eventually filter the document set to be used to classify
+   */
   protected Query query;
 
   /**
@@ -172,6 +195,12 @@ public class SimpleNaiveBayesClassifier 
     return returnList;
   }
 
+  /**
+   * count the number of documents in the index having at least a value for the 'class' field
+   *
+   * @return the no. of documents having a value for the 'class' field
+   * @throws IOException
+   */
   protected int countDocsWithClass() throws IOException {
     int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
     if (docCount == -1) { // in case codec doesn't support getDocCount
@@ -188,6 +217,13 @@ public class SimpleNaiveBayesClassifier 
     return docCount;
   }
 
+  /**
+   * tokenize a <code>String</code> on this classifier's text fields and analyzer
+   *
+   * @param doc the <code>String</code> representing an input text (to be classified)
+   * @return
+   * @throws IOException
+   */
   protected String[] tokenizeDoc(String doc) throws IOException {
     Collection<String> result = new LinkedList<>();
     for (String textFieldName : textFieldNames) {