You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/01/26 05:59:11 UTC
[3/3] lucenenet git commit: Lucene.Net.Classification: documentation
comments
Lucene.Net.Classification: documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/726c43f0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/726c43f0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/726c43f0
Branch: refs/heads/api-work
Commit: 726c43f04fad32147f803e54a7afa25fc9bd10a2
Parents: 70f2e79
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Jan 26 12:58:39 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Jan 26 12:58:39 2017 +0700
----------------------------------------------------------------------
.../ClassificationResult.cs | 11 +++---
src/Lucene.Net.Classification/IClassifier.cs | 10 ++---
.../KNearestNeighborClassifier.cs | 39 +++++++++++++++----
.../SimpleNaiveBayesClassifier.cs | 29 ++++++++++++++
.../Utils/DatasetSplitter.cs | 40 ++++++++++----------
5 files changed, 89 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/726c43f0/src/Lucene.Net.Classification/ClassificationResult.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Classification/ClassificationResult.cs b/src/Lucene.Net.Classification/ClassificationResult.cs
index 84984a3..9683e83 100644
--- a/src/Lucene.Net.Classification/ClassificationResult.cs
+++ b/src/Lucene.Net.Classification/ClassificationResult.cs
@@ -18,7 +18,7 @@ namespace Lucene.Net.Classification
*/
/// <summary>
- /// The result of a call to {@link Classifier#assignClass(String)} holding an assigned class of type <code>T</code> and a score.
+ /// The result of a call to <see cref="IClassifier{T}.AssignClass(string)"/> holding an assigned class of type <typeparam name="T"/> and a score.
/// @lucene.experimental
/// </summary>
public class ClassificationResult<T>
@@ -29,8 +29,8 @@ namespace Lucene.Net.Classification
/// <summary>
/// Constructor
- /// <param name="assignedClass">assignedClass the class <code>T</code> assigned by a {@link Classifier}</param>
- /// <param name="score">score the score for the assignedClass as a <code>double</code></param>
+ /// <param name="assignedClass">the class <typeparamref name="T"/> assigned by a <see cref="IClassifier{T}"/></param>
+ /// <param name="score">score the score for the <paramref name="assignedClass"/> as a <see cref="double"/></param>
/// </summary>
public ClassificationResult(T assignedClass, double score)
{
@@ -40,7 +40,7 @@ namespace Lucene.Net.Classification
/// <summary>
/// retrieve the result class
- /// @return a <code>T</code> representing an assigned class
+ /// @return a <typeparamref name="T"/> representing an assigned class
/// </summary>
public virtual T AssignedClass
{
@@ -51,8 +51,7 @@ namespace Lucene.Net.Classification
}
/// <summary>
- /// retrieve the result score
- /// @return a <code>double</code> representing a result score
+ /// Gets a <see cref="double"/> representing a result score.
/// </summary>
public virtual double Score
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/726c43f0/src/Lucene.Net.Classification/IClassifier.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Classification/IClassifier.cs b/src/Lucene.Net.Classification/IClassifier.cs
index bd3e2ca..925db20 100644
--- a/src/Lucene.Net.Classification/IClassifier.cs
+++ b/src/Lucene.Net.Classification/IClassifier.cs
@@ -23,20 +23,20 @@ namespace Lucene.Net.Classification
/// <summary>
/// A classifier, see <code>http://en.wikipedia.org/wiki/Classifier_(mathematics)</code>, which assign classes of type
- /// <code>T</code>
+ /// <typeparam name="T"/>
/// @lucene.experimental
/// </summary>
public interface IClassifier<T>
{
/// <summary>
- /// Assign a class (with score) to the given text String
+ /// Assign a class (with score) to the given text string
/// </summary>
- /// <param name="text">a String containing text to be classified</param>
- /// <returns>a {ClassificationResult} holding assigned class of type <code>T</code> and score</returns>
+ /// <param name="text">a string containing text to be classified</param>
+ /// <returns>a <see cref="ClassificationResult{T}"/> holding assigned class of type <typeparamref name="T"/> and score</returns>
ClassificationResult<T> AssignClass(string text);
/// <summary>
- /// * Train the classifier using the underlying Lucene index
+ /// Train the classifier using the underlying Lucene index
/// </summary>
/// <param name="analyzer"> the analyzer used to tokenize / filter the unseen text</param>
/// <param name="atomicReader">the reader to use to access the Lucene index</param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/726c43f0/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs b/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
index 61d1750..5122462 100644
--- a/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
+++ b/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
@@ -27,7 +27,7 @@ namespace Lucene.Net.Classification
/// <summary>
/// A k-Nearest Neighbor classifier (see <code>http://en.wikipedia.org/wiki/K-nearest_neighbors</code>) based
- /// on {@link MoreLikeThis}
+ /// on <see cref="MoreLikeThis"/>
///
/// @lucene.experimental
/// </summary>
@@ -44,17 +44,17 @@ namespace Lucene.Net.Classification
private int _minDocsFreq;
private int _minTermFreq;
- /// <summary>Create a {@link Classifier} using kNN algorithm</summary>
- /// <param name="k">the number of neighbors to analyze as an <code>int</code></param>
+ /// <summary>Create a <see cref="IClassifier{T}"/> using kNN algorithm</summary>
+ /// <param name="k">the number of neighbors to analyze as an <see cref="int"/></param>
public KNearestNeighborClassifier(int k)
{
_k = k;
}
- /// <summary>Create a {@link Classifier} using kNN algorithm</summary>
- /// <param name="k">the number of neighbors to analyze as an <code>int</code></param>
- /// <param name="minDocsFreq">the minimum number of docs frequency for MLT to be set with {@link MoreLikeThis#setMinDocFreq(int)}</param>
- /// <param name="minTermFreq">the minimum number of term frequency for MLT to be set with {@link MoreLikeThis#setMinTermFreq(int)}</param>
+ /// <summary>Create a <see cref="IClassifier{T}"/> using kNN algorithm</summary>
+ /// <param name="k">the number of neighbors to analyze as an <see cref="int"/></param>
+ /// <param name="minDocsFreq">the minimum number of docs frequency for MLT to be set with <see cref="MoreLikeThis.MinDocFreq"/></param>
+ /// <param name="minTermFreq">the minimum number of term frequency for MLT to be set with <see cref="MoreLikeThis.MinTermFreq"/></param>
public KNearestNeighborClassifier(int k, int minDocsFreq, int minTermFreq)
{
_k = k;
@@ -62,6 +62,11 @@ namespace Lucene.Net.Classification
_minTermFreq = minTermFreq;
}
+ /// <summary>
+ /// Assign a class (with score) to the given text string
+ /// </summary>
+ /// <param name="text">a string containing text to be classified</param>
+ /// <returns>a <see cref="ClassificationResult{BytesRef}"/> holding assigned class of type <see cref="BytesRef"/> and score</returns>
public virtual ClassificationResult<BytesRef> AssignClass(string text)
{
if (_mlt == null)
@@ -116,17 +121,35 @@ namespace Lucene.Net.Classification
return new ClassificationResult<BytesRef>(assignedClass, score);
}
+ /// <summary>
+ /// Train the classifier using the underlying Lucene index
+ /// </summary>
+ /// <param name="analyzer"> the analyzer used to tokenize / filter the unseen text</param>
+ /// <param name="atomicReader">the reader to use to access the Lucene index</param>
+ /// <param name="classFieldName">the name of the field containing the class assigned to documents</param>
+ /// <param name="textFieldName">the name of the field used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string textFieldName, string classFieldName, Analyzer analyzer)
{
Train(atomicReader, textFieldName, classFieldName, analyzer, null);
}
-
+ /// <summary>Train the classifier using the underlying Lucene index</summary>
+ /// <param name="analyzer">the analyzer used to tokenize / filter the unseen text</param>
+ /// <param name="atomicReader">the reader to use to access the Lucene index</param>
+ /// <param name="classFieldName">the name of the field containing the class assigned to documents</param>
+ /// <param name="query">the query to filter which documents use for training</param>
+ /// <param name="textFieldName">the name of the field used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string textFieldName, string classFieldName, Analyzer analyzer, Query query)
{
Train(atomicReader, new string[]{textFieldName}, classFieldName, analyzer, query);
}
+ /// <summary>Train the classifier using the underlying Lucene index</summary>
+ /// <param name="analyzer">the analyzer used to tokenize / filter the unseen text</param>
+ /// <param name="atomicReader">the reader to use to access the Lucene index</param>
+ /// <param name="classFieldName">the name of the field containing the class assigned to documents</param>
+ /// <param name="query">the query to filter which documents use for training</param>
+ /// <param name="textFieldNames">the names of the fields to be used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string[] textFieldNames, string classFieldName, Analyzer analyzer, Query query)
{
_textFieldNames = textFieldNames;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/726c43f0/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs b/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
index c25aab9..7172e23 100644
--- a/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
+++ b/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
@@ -41,20 +41,44 @@ namespace Lucene.Net.Classification
private IndexSearcher _indexSearcher;
private Query _query;
+ /// <summary>
+ /// Creates a new NaiveBayes classifier.
+ /// Note that you must call <see cref="Train(AtomicReader, string, string, Analyzer)"/> before you can
+ /// classify any documents.
+ /// </summary>
public SimpleNaiveBayesClassifier()
{
}
+ /// <summary>
+ /// Train the classifier using the underlying Lucene index
+ /// </summary>
+ /// <param name="analyzer"> the analyzer used to tokenize / filter the unseen text</param>
+ /// <param name="atomicReader">the reader to use to access the Lucene index</param>
+ /// <param name="classFieldName">the name of the field containing the class assigned to documents</param>
+ /// <param name="textFieldName">the name of the field used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string textFieldName, string classFieldName, Analyzer analyzer)
{
Train(atomicReader, textFieldName, classFieldName, analyzer, null);
}
+ /// <summary>Train the classifier using the underlying Lucene index</summary>
+ /// <param name="analyzer">the analyzer used to tokenize / filter the unseen text</param>
+ /// <param name="atomicReader">the reader to use to access the Lucene index</param>
+ /// <param name="classFieldName">the name of the field containing the class assigned to documents</param>
+ /// <param name="query">the query to filter which documents use for training</param>
+ /// <param name="textFieldName">the name of the field used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string textFieldName, string classFieldName, Analyzer analyzer, Query query)
{
Train(atomicReader, new string[]{textFieldName}, classFieldName, analyzer, query);
}
+ /// <summary>Train the classifier using the underlying Lucene index</summary>
+ /// <param name="analyzer">the analyzer used to tokenize / filter the unseen text</param>
+ /// <param name="atomicReader">the reader to use to access the Lucene index</param>
+ /// <param name="classFieldName">the name of the field containing the class assigned to documents</param>
+ /// <param name="query">the query to filter which documents use for training</param>
+ /// <param name="textFieldNames">the names of the fields to be used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string[] textFieldNames, string classFieldName, Analyzer analyzer, Query query)
{
_atomicReader = atomicReader;
@@ -109,6 +133,11 @@ namespace Lucene.Net.Classification
return ret;
}
+ /// <summary>
+ /// Assign a class (with score) to the given text string
+ /// </summary>
+ /// <param name="inputDocument">a string containing text to be classified</param>
+ /// <returns>a <see cref="ClassificationResult{BytesRef}"/> holding assigned class of type <see cref="BytesRef"/> and score</returns>
public virtual ClassificationResult<BytesRef> AssignClass(string inputDocument)
{
if (_atomicReader == null)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/726c43f0/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs b/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
index 1d927a9..96af827 100644
--- a/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
+++ b/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
@@ -25,38 +25,36 @@ namespace Lucene.Net.Classification.Utils
* limitations under the License.
*/
- /**
- * Utility class for creating training / test / cross validation indexes from the original index.
- */
+ /// <summary>
+ /// Utility class for creating training / test / cross validation indexes from the original index.
+ /// </summary>
public class DatasetSplitter
{
private readonly double _crossValidationRatio;
private readonly double _testRatio;
- /**
- * Create a {@link DatasetSplitter} by giving test and cross validation IDXs sizes
- *
- * @param testRatio the ratio of the original index to be used for the test IDX as a <code>double</code> between 0.0 and 1.0
- * @param crossValidationRatio the ratio of the original index to be used for the c.v. IDX as a <code>double</code> between 0.0 and 1.0
- */
+ /// <summary>
+ /// Create a <see cref="DatasetSplitter"/> by giving test and cross validation IDXs sizes
+ /// </summary>
+ /// <param name="testRatio">the ratio of the original index to be used for the test IDX as a <see cref="double"/> between 0.0 and 1.0</param>
+ /// <param name="crossValidationRatio">the ratio of the original index to be used for the c.v. IDX as a <see cref="double"/> between 0.0 and 1.0</param>
public DatasetSplitter(double testRatio, double crossValidationRatio)
{
this._crossValidationRatio = crossValidationRatio;
this._testRatio = testRatio;
}
- /**
- * Split a given index into 3 indexes for training, test and cross validation tasks respectively
- *
- * @param originalIndex an {@link AtomicReader} on the source index
- * @param trainingIndex a {@link Directory} used to write the training index
- * @param testIndex a {@link Directory} used to write the test index
- * @param crossValidationIndex a {@link Directory} used to write the cross validation index
- * @param analyzer {@link Analyzer} used to create the new docs
- * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used
- * @throws IOException if any writing operation fails on any of the indexes
- */
+ /// <summary>
+ /// Split a given index into 3 indexes for training, test and cross validation tasks respectively
+ /// </summary>
+ /// <param name="originalIndex">an <see cref="AtomicReader"/> on the source index</param>
+ /// <param name="trainingIndex">a <see cref="Directory"/> used to write the training index</param>
+ /// <param name="testIndex">a <see cref="Directory"/> used to write the test index</param>
+ /// <param name="crossValidationIndex">a <see cref="Directory"/> used to write the cross validation index</param>
+ /// <param name="analyzer"><see cref="Analyzer"/> used to create the new docs</param>
+ /// <param name="fieldNames">names of fields that need to be put in the new indexes or <c>null</c> if all should be used</param>
+ /// <exception cref="IOException">if any writing operation fails on any of the indexes</exception>
public virtual void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames)
{
#pragma warning disable 612, 618
@@ -88,7 +86,7 @@ namespace Lucene.Net.Classification.Utils
Document doc = new Document();
if (fieldNames != null && fieldNames.Length > 0)
{
- foreach (String fieldName in fieldNames)
+ foreach (string fieldName in fieldNames)
{
doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft));
}