You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2021/02/15 21:51:05 UTC

[lucenenet] branch master updated: Lucene.Net.Classification: Removed leading underscore from member vars in several classes (#422)

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git


The following commit(s) were added to refs/heads/master by this push:
     new 84a1329  Lucene.Net.Classification: Removed leading underscore from member vars in several classes (#422)
84a1329 is described below

commit 84a132991e3333e2b025ca33da3847d27c144244
Author: Ron Clabo <rc...@users.noreply.github.com>
AuthorDate: Mon Feb 15 16:51:00 2021 -0500

    Lucene.Net.Classification: Removed leading underscore from member vars in several classes (#422)
---
 .../ClassificationResult.cs                        | 14 ++---
 .../KNearestNeighborClassifier.cs                  | 66 ++++++++++----------
 .../SimpleNaiveBayesClassifier.cs                  | 70 +++++++++++-----------
 .../Utils/DatasetSplitter.cs                       | 14 ++---
 4 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/src/Lucene.Net.Classification/ClassificationResult.cs b/src/Lucene.Net.Classification/ClassificationResult.cs
index 8744efb..e90f178 100644
--- a/src/Lucene.Net.Classification/ClassificationResult.cs
+++ b/src/Lucene.Net.Classification/ClassificationResult.cs
@@ -1,4 +1,4 @@
-namespace Lucene.Net.Classification
+namespace Lucene.Net.Classification
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,8 +24,8 @@ namespace Lucene.Net.Classification
     public class ClassificationResult<T>
     {
 
-        private readonly T _assignedClass;
-        private readonly double _score;
+        private readonly T assignedClass;
+        private readonly double score;
 
         /// <summary>
         /// Constructor
@@ -34,19 +34,19 @@ namespace Lucene.Net.Classification
         /// </summary>
         public ClassificationResult(T assignedClass, double score) 
         {
-            _assignedClass = assignedClass;
-            _score = score;
+            this.assignedClass = assignedClass;
+            this.score = score;
         }
 
         /// <summary>
         /// retrieve the result class
         /// @return a <typeparamref name="T"/> representing an assigned class
         /// </summary>
-        public virtual T AssignedClass => _assignedClass;
+        public virtual T AssignedClass => assignedClass;
 
         /// <summary>
         /// Gets a <see cref="double"/> representing a result score.
         /// </summary>
-        public virtual double Score => _score;
+        public virtual double Score => score;
     }
 }
\ No newline at end of file
diff --git a/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs b/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
index 9f198e5..b6970bd 100644
--- a/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
+++ b/src/Lucene.Net.Classification/KNearestNeighborClassifier.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis;
+using Lucene.Net.Analysis;
 using Lucene.Net.Index;
 using Lucene.Net.Queries.Mlt;
 using Lucene.Net.Search;
@@ -34,21 +34,21 @@ namespace Lucene.Net.Classification
     public class KNearestNeighborClassifier : IClassifier<BytesRef>
     {
 
-        private MoreLikeThis _mlt;
-        private string[] _textFieldNames;
-        private string _classFieldName;
-        private IndexSearcher _indexSearcher;
-        private readonly int _k;
-        private Query _query;
+        private MoreLikeThis mlt;
+        private string[] textFieldNames;
+        private string classFieldName;
+        private IndexSearcher indexSearcher;
+        private readonly int k;
+        private Query query;
 
-        private readonly int _minDocsFreq; // LUCENENET: marked readonly
-        private readonly int _minTermFreq; // LUCENENET: marked readonly
+        private readonly int minDocsFreq; // LUCENENET: marked readonly
+        private readonly int minTermFreq; // LUCENENET: marked readonly
 
         /// <summary>Create a <see cref="IClassifier{T}"/> using kNN algorithm</summary>
         /// <param name="k">the number of neighbors to analyze as an <see cref="int"/></param>
         public KNearestNeighborClassifier(int k)
         {
-            _k = k;
+            this.k = k;
         }
 
         /// <summary>Create a <see cref="IClassifier{T}"/> using kNN algorithm</summary>
@@ -57,9 +57,9 @@ namespace Lucene.Net.Classification
         /// <param name="minTermFreq">the minimum number of term frequency for MLT to be set with <see cref="MoreLikeThis.MinTermFreq"/></param>
         public KNearestNeighborClassifier(int k, int minDocsFreq, int minTermFreq)
         {
-            _k = k;
-            _minDocsFreq = minDocsFreq;
-            _minTermFreq = minTermFreq;
+            this.k = k;
+            this.minDocsFreq = minDocsFreq;
+            this.minTermFreq = minTermFreq;
         }
 
         /// <summary>
@@ -69,23 +69,23 @@ namespace Lucene.Net.Classification
         /// <returns>a <see cref="ClassificationResult{BytesRef}"/> holding assigned class of type <see cref="BytesRef"/> and score</returns>
         public virtual ClassificationResult<BytesRef> AssignClass(string text)
         {
-            if (_mlt == null)
+            if (mlt == null)
             {
                 throw new IOException("You must first call Classifier#train");
             }
 
             BooleanQuery mltQuery = new BooleanQuery();
-            foreach (string textFieldName in _textFieldNames)
+            foreach (string textFieldName in textFieldNames)
             {
-                mltQuery.Add(new BooleanClause(_mlt.Like(new StringReader(text), textFieldName), Occur.SHOULD));
+                mltQuery.Add(new BooleanClause(mlt.Like(new StringReader(text), textFieldName), Occur.SHOULD));
             }
-            Query classFieldQuery = new WildcardQuery(new Term(_classFieldName, "*"));
+            Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
             mltQuery.Add(new BooleanClause(classFieldQuery, Occur.MUST));
-            if (_query != null)
+            if (query != null)
             {
-                mltQuery.Add(_query, Occur.MUST);
+                mltQuery.Add(query, Occur.MUST);
             }
-            TopDocs topDocs = _indexSearcher.Search(mltQuery, _k);
+            TopDocs topDocs = indexSearcher.Search(mltQuery, k);
             return SelectClassFromNeighbors(topDocs);
         }
 
@@ -96,7 +96,7 @@ namespace Lucene.Net.Classification
 
             foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
             {
-                BytesRef cl = new BytesRef(_indexSearcher.Doc(scoreDoc.Doc).GetField(_classFieldName).GetStringValue());
+                BytesRef cl = new BytesRef(indexSearcher.Doc(scoreDoc.Doc).GetField(classFieldName).GetStringValue());
                 if (classCounts.TryGetValue(cl, out int value))
                 {
                     classCounts[cl] = value + 1;
@@ -117,7 +117,7 @@ namespace Lucene.Net.Classification
                     assignedClass = (BytesRef)entry.Key.Clone();
                 }
             }
-            double score = max / (double) _k;
+            double score = max / (double) k;
             return new ClassificationResult<BytesRef>(assignedClass, score);
         }
 
@@ -152,21 +152,21 @@ namespace Lucene.Net.Classification
         /// <param name="textFieldNames">the names of the fields to be used to compare documents</param>
         public virtual void Train(AtomicReader atomicReader, string[] textFieldNames, string classFieldName, Analyzer analyzer, Query query)
         {
-            _textFieldNames = textFieldNames;
-            _classFieldName = classFieldName;
-            _mlt = new MoreLikeThis(atomicReader);
-            _mlt.Analyzer = analyzer;
-            _mlt.FieldNames = _textFieldNames;
-            _indexSearcher = new IndexSearcher(atomicReader);
-            if (_minDocsFreq > 0)
+            this.textFieldNames = textFieldNames;
+            this.classFieldName = classFieldName;
+            mlt = new MoreLikeThis(atomicReader);
+            mlt.Analyzer = analyzer;
+            mlt.FieldNames = this.textFieldNames;
+            indexSearcher = new IndexSearcher(atomicReader);
+            if (minDocsFreq > 0)
             {
-                _mlt.MinDocFreq = _minDocsFreq;
+                mlt.MinDocFreq = minDocsFreq;
             }
-            if (_minTermFreq > 0)
+            if (minTermFreq > 0)
             {
-                _mlt.MinTermFreq = _minTermFreq;
+                mlt.MinTermFreq = minTermFreq;
             }
-            _query = query;
+            this.query = query;
         }
     }
 }
\ No newline at end of file
diff --git a/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs b/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
index 98f60d5..9945efc 100644
--- a/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
+++ b/src/Lucene.Net.Classification/SimpleNaiveBayesClassifier.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis;
+using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Index;
 using Lucene.Net.Search;
@@ -33,13 +33,13 @@ namespace Lucene.Net.Classification
     /// </summary>
     public class SimpleNaiveBayesClassifier : IClassifier<BytesRef>
     {
-        private AtomicReader _atomicReader;
-        private string[] _textFieldNames;
-        private string _classFieldName;
-        private int _docsWithClassSize;
-        private Analyzer _analyzer;
-        private IndexSearcher _indexSearcher;
-        private Query _query;
+        private AtomicReader atomicReader;
+        private string[] textFieldNames;
+        private string classFieldName;
+        private int docsWithClassSize;
+        private Analyzer analyzer;
+        private IndexSearcher indexSearcher;
+        private Query query;
 
         /// <summary>
         /// Creates a new NaiveBayes classifier.
@@ -81,30 +81,30 @@ namespace Lucene.Net.Classification
         /// <param name="textFieldNames">the names of the fields to be used to compare documents</param>
         public virtual void Train(AtomicReader atomicReader, string[] textFieldNames, string classFieldName, Analyzer analyzer, Query query)
         {
-            _atomicReader = atomicReader;
-            _indexSearcher = new IndexSearcher(_atomicReader);
-            _textFieldNames = textFieldNames;
-            _classFieldName = classFieldName;
-            _analyzer = analyzer;
-            _query = query;
-            _docsWithClassSize = CountDocsWithClass();
+            this.atomicReader = atomicReader;
+            indexSearcher = new IndexSearcher(this.atomicReader);
+            this.textFieldNames = textFieldNames;
+            this.classFieldName = classFieldName;
+            this.analyzer = analyzer;
+            this.query = query;
+            docsWithClassSize = CountDocsWithClass();
         }
 
         private int CountDocsWithClass() 
         {
-            int docCount = MultiFields.GetTerms(_atomicReader, _classFieldName).DocCount;
+            int docCount = MultiFields.GetTerms(atomicReader, classFieldName).DocCount;
             if (docCount == -1) 
             { // in case codec doesn't support getDocCount
                 TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
                 BooleanQuery q = new BooleanQuery
                 {
-                    new BooleanClause(new WildcardQuery(new Term(_classFieldName, WildcardQuery.WILDCARD_STRING.ToString())), Occur.MUST)
+                    new BooleanClause(new WildcardQuery(new Term(classFieldName, WildcardQuery.WILDCARD_STRING.ToString())), Occur.MUST)
                 };
-                if (_query != null) 
+                if (query != null) 
                 {
-                    q.Add(_query, Occur.MUST);
+                    q.Add(query, Occur.MUST);
                 }
-                _indexSearcher.Search(q, totalHitCountCollector);
+                indexSearcher.Search(q, totalHitCountCollector);
                 docCount = totalHitCountCollector.TotalHits;
             }
             return docCount;
@@ -113,8 +113,8 @@ namespace Lucene.Net.Classification
         private string[] TokenizeDoc(string doc)
         {
             ICollection<string> result = new LinkedList<string>();
-            foreach (string textFieldName in _textFieldNames) {
-                TokenStream tokenStream = _analyzer.GetTokenStream(textFieldName, new StringReader(doc));
+            foreach (string textFieldName in textFieldNames) {
+                TokenStream tokenStream = analyzer.GetTokenStream(textFieldName, new StringReader(doc));
                 try 
                 {
                     ICharTermAttribute charTermAttribute = tokenStream.AddAttribute<ICharTermAttribute>();
@@ -142,14 +142,14 @@ namespace Lucene.Net.Classification
         /// <returns>a <see cref="ClassificationResult{BytesRef}"/> holding assigned class of type <see cref="BytesRef"/> and score</returns>
         public virtual ClassificationResult<BytesRef> AssignClass(string inputDocument) 
         {
-            if (_atomicReader == null) 
+            if (atomicReader == null) 
             {
                 throw new IOException("You must first call Classifier#train");
             }
             double max = - double.MaxValue;
             BytesRef foundClass = new BytesRef();
 
-            Terms terms = MultiFields.GetTerms(_atomicReader, _classFieldName);
+            Terms terms = MultiFields.GetTerms(atomicReader, classFieldName);
             TermsEnum termsEnum = terms.GetEnumerator();
             BytesRef next;
             string[] tokenizedDoc = TokenizeDoc(inputDocument);
@@ -181,7 +181,7 @@ namespace Lucene.Net.Classification
                 double num = hits + 1; // +1 is added because of add 1 smoothing
 
                 // den : for the whole dictionary, count the no of times a word appears in documents of class c (+|V|)
-                double den = GetTextTermFreqForClass(c) + _docsWithClassSize;
+                double den = GetTextTermFreqForClass(c) + docsWithClassSize;
 
                 // P(w|c) = num/den
                 double wordProbability = num / den;
@@ -195,13 +195,13 @@ namespace Lucene.Net.Classification
         private double GetTextTermFreqForClass(BytesRef c)
         {
             double avgNumberOfUniqueTerms = 0;
-            foreach (string textFieldName in _textFieldNames) 
+            foreach (string textFieldName in textFieldNames) 
             {
-                Terms terms = MultiFields.GetTerms(_atomicReader, textFieldName);
+                Terms terms = MultiFields.GetTerms(atomicReader, textFieldName);
                 long numPostings = terms.SumDocFreq; // number of term/doc pairs
                 avgNumberOfUniqueTerms += numPostings / (double) terms.DocCount; // avg # of unique terms per doc
             }
-            int docsWithC = _atomicReader.DocFreq(new Term(_classFieldName, c));
+            int docsWithC = atomicReader.DocFreq(new Term(classFieldName, c));
             return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
         }
 
@@ -209,29 +209,29 @@ namespace Lucene.Net.Classification
         {
             BooleanQuery booleanQuery = new BooleanQuery();
             BooleanQuery subQuery = new BooleanQuery();
-            foreach (string textFieldName in _textFieldNames) 
+            foreach (string textFieldName in textFieldNames) 
             {
                 subQuery.Add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), Occur.SHOULD));
             }
             booleanQuery.Add(new BooleanClause(subQuery, Occur.MUST));
-            booleanQuery.Add(new BooleanClause(new TermQuery(new Term(_classFieldName, c)), Occur.MUST));
-            if (_query != null) 
+            booleanQuery.Add(new BooleanClause(new TermQuery(new Term(classFieldName, c)), Occur.MUST));
+            if (query != null) 
             {
-                booleanQuery.Add(_query, Occur.MUST);
+                booleanQuery.Add(query, Occur.MUST);
             }
             TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
-            _indexSearcher.Search(booleanQuery, totalHitCountCollector);
+            indexSearcher.Search(booleanQuery, totalHitCountCollector);
             return totalHitCountCollector.TotalHits;
         }
 
         private double CalculateLogPrior(BytesRef currentClass)
         {
-            return Math.Log((double) DocCount(currentClass)) - Math.Log(_docsWithClassSize);
+            return Math.Log((double) DocCount(currentClass)) - Math.Log(docsWithClassSize);
         }
 
         private int DocCount(BytesRef countedClass) 
         {
-            return _atomicReader.DocFreq(new Term(_classFieldName, countedClass));
+            return atomicReader.DocFreq(new Term(classFieldName, countedClass));
         }
     }   
 }
\ No newline at end of file
diff --git a/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs b/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
index 702e363..c1a09e2 100644
--- a/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
+++ b/src/Lucene.Net.Classification/Utils/DatasetSplitter.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis;
+using Lucene.Net.Analysis;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
 using Lucene.Net.Search;
@@ -33,8 +33,8 @@ namespace Lucene.Net.Classification.Utils
     public class DatasetSplitter
     {
 
-        private readonly double _crossValidationRatio;
-        private readonly double _testRatio;
+        private readonly double crossValidationRatio;
+        private readonly double testRatio;
 
         /// <summary>
         /// Create a <see cref="DatasetSplitter"/> by giving test and cross validation IDXs sizes
@@ -43,8 +43,8 @@ namespace Lucene.Net.Classification.Utils
         /// <param name="crossValidationRatio">the ratio of the original index to be used for the c.v. IDX as a <see cref="double"/> between 0.0 and 1.0</param>
         public DatasetSplitter(double testRatio, double crossValidationRatio)
         {
-            this._crossValidationRatio = crossValidationRatio;
-            this._testRatio = testRatio;
+            this.crossValidationRatio = crossValidationRatio;
+            this.testRatio = testRatio;
         }
 
         /// <summary>
@@ -127,11 +127,11 @@ namespace Lucene.Net.Classification.Utils
                     }
 
                     // add it to one of the IDXs
-                    if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio)
+                    if (b % 2 == 0 && testWriter.MaxDoc < size * testRatio)
                     {
                         testWriter.AddDocument(doc);
                     }
-                    else if (cvWriter.MaxDoc < size * _crossValidationRatio)
+                    else if (cvWriter.MaxDoc < size * crossValidationRatio)
                     {
                         cvWriter.AddDocument(doc);
                     }