You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2021/02/15 21:51:05 UTC
[lucenenet] branch master updated: Lucene.Net.Classification:
Removed leading underscore from member vars in several classes (#422)
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new 84a1329 Lucene.Net.Classification: Removed leading underscore from member vars in several classes (#422)
84a1329 is described below
commit 84a132991e3333e2b025ca33da3847d27c144244
Author: Ron Clabo <rc...@users.noreply.github.com>
AuthorDate: Mon Feb 15 16:51:00 2021 -0500
Lucene.Net.Classification: Removed leading underscore from member vars in several classes (#422)
---
.../ClassificationResult.cs | 14 ++---
.../KNearestNeighborClassifier.cs | 66 ++++++++++----------
.../SimpleNaiveBayesClassifier.cs | 70 +++++++++++-----------
.../Utils/DatasetSplitter.cs | 14 ++---
4 files changed, 82 insertions(+), 82 deletions(-)
diff --git a/src/Lucene.Net.Classification/ClassificationResult.cs b/src/Lucene.Net.Classification/ClassificationResult.cs
index 8744efb..e90f178 100644
--- a/src/Lucene.Net.Classification/ClassificationResult.cs
+++ b/src/Lucene.Net.Classification/ClassificationResult.cs
@@ -1,4 +1,4 @@
-namespace Lucene.Net.Classification
+namespace Lucene.Net.Classification
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,8 +24,8 @@ namespace Lucene.Net.Classification
public class ClassificationResult<T>
{
- private readonly T _assignedClass;
- private readonly double _score;
+ private readonly T assignedClass;
+ private readonly double score;
/// <summary>
/// Constructor
@@ -34,19 +34,19 @@ namespace Lucene.Net.Classification
/// </summary>
public ClassificationResult(T assignedClass, double score)
{
- _assignedClass = assignedClass;
- _score = score;
+ this.assignedClass = assignedClass;
+ this.score = score;
}
/// <summary>
/// retrieve the result class
/// @return a <typeparamref name="T"/> representing an assigned class
/// </summary>
- public virtual T AssignedClass => _assignedClass;
+ public virtual T AssignedClass => assignedClass;
/// <summary>
/// Gets a <see cref="double"/> representing a result score.
/// </summary>
- public virtual double Score => _score;
+ public virtual double Score => score;
}
}
\ No newline at end of file
diff --git a/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs b/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
index 9f198e5..b6970bd 100644
--- a/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
+++ b/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis;
+using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Queries.Mlt;
using Lucene.Net.Search;
@@ -34,21 +34,21 @@ namespace Lucene.Net.Classification
public class KNearestNeighborClassifier : IClassifier<BytesRef>
{
- private MoreLikeThis _mlt;
- private string[] _textFieldNames;
- private string _classFieldName;
- private IndexSearcher _indexSearcher;
- private readonly int _k;
- private Query _query;
+ private MoreLikeThis mlt;
+ private string[] textFieldNames;
+ private string classFieldName;
+ private IndexSearcher indexSearcher;
+ private readonly int k;
+ private Query query;
- private readonly int _minDocsFreq; // LUCENENET: marked readonly
- private readonly int _minTermFreq; // LUCENENET: marked readonly
+ private readonly int minDocsFreq; // LUCENENET: marked readonly
+ private readonly int minTermFreq; // LUCENENET: marked readonly
/// <summary>Create a <see cref="IClassifier{T}"/> using kNN algorithm</summary>
/// <param name="k">the number of neighbors to analyze as an <see cref="int"/></param>
public KNearestNeighborClassifier(int k)
{
- _k = k;
+ this.k = k;
}
/// <summary>Create a <see cref="IClassifier{T}"/> using kNN algorithm</summary>
@@ -57,9 +57,9 @@ namespace Lucene.Net.Classification
/// <param name="minTermFreq">the minimum number of term frequency for MLT to be set with <see cref="MoreLikeThis.MinTermFreq"/></param>
public KNearestNeighborClassifier(int k, int minDocsFreq, int minTermFreq)
{
- _k = k;
- _minDocsFreq = minDocsFreq;
- _minTermFreq = minTermFreq;
+ this.k = k;
+ this.minDocsFreq = minDocsFreq;
+ this.minTermFreq = minTermFreq;
}
/// <summary>
@@ -69,23 +69,23 @@ namespace Lucene.Net.Classification
/// <returns>a <see cref="ClassificationResult{BytesRef}"/> holding assigned class of type <see cref="BytesRef"/> and score</returns>
public virtual ClassificationResult<BytesRef> AssignClass(string text)
{
- if (_mlt == null)
+ if (mlt == null)
{
throw new IOException("You must first call Classifier#train");
}
BooleanQuery mltQuery = new BooleanQuery();
- foreach (string textFieldName in _textFieldNames)
+ foreach (string textFieldName in textFieldNames)
{
- mltQuery.Add(new BooleanClause(_mlt.Like(new StringReader(text), textFieldName), Occur.SHOULD));
+ mltQuery.Add(new BooleanClause(mlt.Like(new StringReader(text), textFieldName), Occur.SHOULD));
}
- Query classFieldQuery = new WildcardQuery(new Term(_classFieldName, "*"));
+ Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
mltQuery.Add(new BooleanClause(classFieldQuery, Occur.MUST));
- if (_query != null)
+ if (query != null)
{
- mltQuery.Add(_query, Occur.MUST);
+ mltQuery.Add(query, Occur.MUST);
}
- TopDocs topDocs = _indexSearcher.Search(mltQuery, _k);
+ TopDocs topDocs = indexSearcher.Search(mltQuery, k);
return SelectClassFromNeighbors(topDocs);
}
@@ -96,7 +96,7 @@ namespace Lucene.Net.Classification
foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
{
- BytesRef cl = new BytesRef(_indexSearcher.Doc(scoreDoc.Doc).GetField(_classFieldName).GetStringValue());
+ BytesRef cl = new BytesRef(indexSearcher.Doc(scoreDoc.Doc).GetField(classFieldName).GetStringValue());
if (classCounts.TryGetValue(cl, out int value))
{
classCounts[cl] = value + 1;
@@ -117,7 +117,7 @@ namespace Lucene.Net.Classification
assignedClass = (BytesRef)entry.Key.Clone();
}
}
- double score = max / (double) _k;
+ double score = max / (double) k;
return new ClassificationResult<BytesRef>(assignedClass, score);
}
@@ -152,21 +152,21 @@ namespace Lucene.Net.Classification
/// <param name="textFieldNames">the names of the fields to be used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string[] textFieldNames, string classFieldName, Analyzer analyzer, Query query)
{
- _textFieldNames = textFieldNames;
- _classFieldName = classFieldName;
- _mlt = new MoreLikeThis(atomicReader);
- _mlt.Analyzer = analyzer;
- _mlt.FieldNames = _textFieldNames;
- _indexSearcher = new IndexSearcher(atomicReader);
- if (_minDocsFreq > 0)
+ this.textFieldNames = textFieldNames;
+ this.classFieldName = classFieldName;
+ mlt = new MoreLikeThis(atomicReader);
+ mlt.Analyzer = analyzer;
+ mlt.FieldNames = this.textFieldNames;
+ indexSearcher = new IndexSearcher(atomicReader);
+ if (minDocsFreq > 0)
{
- _mlt.MinDocFreq = _minDocsFreq;
+ mlt.MinDocFreq = minDocsFreq;
}
- if (_minTermFreq > 0)
+ if (minTermFreq > 0)
{
- _mlt.MinTermFreq = _minTermFreq;
+ mlt.MinTermFreq = minTermFreq;
}
- _query = query;
+ this.query = query;
}
}
}
\ No newline at end of file
diff --git a/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs b/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
index 98f60d5..9945efc 100644
--- a/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
+++ b/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis;
+using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Index;
using Lucene.Net.Search;
@@ -33,13 +33,13 @@ namespace Lucene.Net.Classification
/// </summary>
public class SimpleNaiveBayesClassifier : IClassifier<BytesRef>
{
- private AtomicReader _atomicReader;
- private string[] _textFieldNames;
- private string _classFieldName;
- private int _docsWithClassSize;
- private Analyzer _analyzer;
- private IndexSearcher _indexSearcher;
- private Query _query;
+ private AtomicReader atomicReader;
+ private string[] textFieldNames;
+ private string classFieldName;
+ private int docsWithClassSize;
+ private Analyzer analyzer;
+ private IndexSearcher indexSearcher;
+ private Query query;
/// <summary>
/// Creates a new NaiveBayes classifier.
@@ -81,30 +81,30 @@ namespace Lucene.Net.Classification
/// <param name="textFieldNames">the names of the fields to be used to compare documents</param>
public virtual void Train(AtomicReader atomicReader, string[] textFieldNames, string classFieldName, Analyzer analyzer, Query query)
{
- _atomicReader = atomicReader;
- _indexSearcher = new IndexSearcher(_atomicReader);
- _textFieldNames = textFieldNames;
- _classFieldName = classFieldName;
- _analyzer = analyzer;
- _query = query;
- _docsWithClassSize = CountDocsWithClass();
+ this.atomicReader = atomicReader;
+ indexSearcher = new IndexSearcher(this.atomicReader);
+ this.textFieldNames = textFieldNames;
+ this.classFieldName = classFieldName;
+ this.analyzer = analyzer;
+ this.query = query;
+ docsWithClassSize = CountDocsWithClass();
}
private int CountDocsWithClass()
{
- int docCount = MultiFields.GetTerms(_atomicReader, _classFieldName).DocCount;
+ int docCount = MultiFields.GetTerms(atomicReader, classFieldName).DocCount;
if (docCount == -1)
{ // in case codec doesn't support getDocCount
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
BooleanQuery q = new BooleanQuery
{
- new BooleanClause(new WildcardQuery(new Term(_classFieldName, WildcardQuery.WILDCARD_STRING.ToString())), Occur.MUST)
+ new BooleanClause(new WildcardQuery(new Term(classFieldName, WildcardQuery.WILDCARD_STRING.ToString())), Occur.MUST)
};
- if (_query != null)
+ if (query != null)
{
- q.Add(_query, Occur.MUST);
+ q.Add(query, Occur.MUST);
}
- _indexSearcher.Search(q, totalHitCountCollector);
+ indexSearcher.Search(q, totalHitCountCollector);
docCount = totalHitCountCollector.TotalHits;
}
return docCount;
@@ -113,8 +113,8 @@ namespace Lucene.Net.Classification
private string[] TokenizeDoc(string doc)
{
ICollection<string> result = new LinkedList<string>();
- foreach (string textFieldName in _textFieldNames) {
- TokenStream tokenStream = _analyzer.GetTokenStream(textFieldName, new StringReader(doc));
+ foreach (string textFieldName in textFieldNames) {
+ TokenStream tokenStream = analyzer.GetTokenStream(textFieldName, new StringReader(doc));
try
{
ICharTermAttribute charTermAttribute = tokenStream.AddAttribute<ICharTermAttribute>();
@@ -142,14 +142,14 @@ namespace Lucene.Net.Classification
/// <returns>a <see cref="ClassificationResult{BytesRef}"/> holding assigned class of type <see cref="BytesRef"/> and score</returns>
public virtual ClassificationResult<BytesRef> AssignClass(string inputDocument)
{
- if (_atomicReader == null)
+ if (atomicReader == null)
{
throw new IOException("You must first call Classifier#train");
}
double max = - double.MaxValue;
BytesRef foundClass = new BytesRef();
- Terms terms = MultiFields.GetTerms(_atomicReader, _classFieldName);
+ Terms terms = MultiFields.GetTerms(atomicReader, classFieldName);
TermsEnum termsEnum = terms.GetEnumerator();
BytesRef next;
string[] tokenizedDoc = TokenizeDoc(inputDocument);
@@ -181,7 +181,7 @@ namespace Lucene.Net.Classification
double num = hits + 1; // +1 is added because of add 1 smoothing
// den : for the whole dictionary, count the no of times a word appears in documents of class c (+|V|)
- double den = GetTextTermFreqForClass(c) + _docsWithClassSize;
+ double den = GetTextTermFreqForClass(c) + docsWithClassSize;
// P(w|c) = num/den
double wordProbability = num / den;
@@ -195,13 +195,13 @@ namespace Lucene.Net.Classification
private double GetTextTermFreqForClass(BytesRef c)
{
double avgNumberOfUniqueTerms = 0;
- foreach (string textFieldName in _textFieldNames)
+ foreach (string textFieldName in textFieldNames)
{
- Terms terms = MultiFields.GetTerms(_atomicReader, textFieldName);
+ Terms terms = MultiFields.GetTerms(atomicReader, textFieldName);
long numPostings = terms.SumDocFreq; // number of term/doc pairs
avgNumberOfUniqueTerms += numPostings / (double) terms.DocCount; // avg # of unique terms per doc
}
- int docsWithC = _atomicReader.DocFreq(new Term(_classFieldName, c));
+ int docsWithC = atomicReader.DocFreq(new Term(classFieldName, c));
return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
}
@@ -209,29 +209,29 @@ namespace Lucene.Net.Classification
{
BooleanQuery booleanQuery = new BooleanQuery();
BooleanQuery subQuery = new BooleanQuery();
- foreach (string textFieldName in _textFieldNames)
+ foreach (string textFieldName in textFieldNames)
{
subQuery.Add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), Occur.SHOULD));
}
booleanQuery.Add(new BooleanClause(subQuery, Occur.MUST));
- booleanQuery.Add(new BooleanClause(new TermQuery(new Term(_classFieldName, c)), Occur.MUST));
- if (_query != null)
+ booleanQuery.Add(new BooleanClause(new TermQuery(new Term(classFieldName, c)), Occur.MUST));
+ if (query != null)
{
- booleanQuery.Add(_query, Occur.MUST);
+ booleanQuery.Add(query, Occur.MUST);
}
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
- _indexSearcher.Search(booleanQuery, totalHitCountCollector);
+ indexSearcher.Search(booleanQuery, totalHitCountCollector);
return totalHitCountCollector.TotalHits;
}
private double CalculateLogPrior(BytesRef currentClass)
{
- return Math.Log((double) DocCount(currentClass)) - Math.Log(_docsWithClassSize);
+ return Math.Log((double) DocCount(currentClass)) - Math.Log(docsWithClassSize);
}
private int DocCount(BytesRef countedClass)
{
- return _atomicReader.DocFreq(new Term(_classFieldName, countedClass));
+ return atomicReader.DocFreq(new Term(classFieldName, countedClass));
}
}
}
\ No newline at end of file
diff --git a/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs b/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
index 702e363..c1a09e2 100644
--- a/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
+++ b/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis;
+using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
@@ -33,8 +33,8 @@ namespace Lucene.Net.Classification.Utils
public class DatasetSplitter
{
- private readonly double _crossValidationRatio;
- private readonly double _testRatio;
+ private readonly double crossValidationRatio;
+ private readonly double testRatio;
/// <summary>
/// Create a <see cref="DatasetSplitter"/> by giving test and cross validation IDXs sizes
@@ -43,8 +43,8 @@ namespace Lucene.Net.Classification.Utils
/// <param name="crossValidationRatio">the ratio of the original index to be used for the c.v. IDX as a <see cref="double"/> between 0.0 and 1.0</param>
public DatasetSplitter(double testRatio, double crossValidationRatio)
{
- this._crossValidationRatio = crossValidationRatio;
- this._testRatio = testRatio;
+ this.crossValidationRatio = crossValidationRatio;
+ this.testRatio = testRatio;
}
/// <summary>
@@ -127,11 +127,11 @@ namespace Lucene.Net.Classification.Utils
}
// add it to one of the IDXs
- if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio)
+ if (b % 2 == 0 && testWriter.MaxDoc < size * testRatio)
{
testWriter.AddDocument(doc);
}
- else if (cvWriter.MaxDoc < size * _crossValidationRatio)
+ else if (cvWriter.MaxDoc < size * crossValidationRatio)
{
cvWriter.AddDocument(doc);
}