You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/07 21:35:14 UTC

[2/2] lucenenet git commit: Some more work on Lucene.Net.Suggest

Some more work on Lucene.Net.Suggest


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6d26b3c7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6d26b3c7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6d26b3c7

Branch: refs/heads/master
Commit: 6d26b3c7ec96d8a683616c0117dc6d0241e0074a
Parents: 709445e
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Fri Nov 7 22:34:22 2014 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Fri Nov 7 22:34:22 2014 +0200

----------------------------------------------------------------------
 src/Lucene.Net.Core/Util/Fst/Util.cs            |  2 +-
 .../Spell/LuceneLevenshteinDistance.cs          | 21 ++---
 .../Suggest/Analyzing/FreeTextSuggester.cs      | 93 +++++++++-----------
 3 files changed, 52 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Core/Util/Fst/Util.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Fst/Util.cs b/src/Lucene.Net.Core/Util/Fst/Util.cs
index 01ee25a..4c15642 100644
--- a/src/Lucene.Net.Core/Util/Fst/Util.cs
+++ b/src/Lucene.Net.Core/Util/Fst/Util.cs
@@ -385,7 +385,7 @@ namespace Lucene.Net.Util.Fst
             }
 
             // If back plus this arc is competitive then add to queue:
-            protected internal virtual void AddIfCompetitive(FSTPath<T> path)
+            protected virtual void AddIfCompetitive(FSTPath<T> path)
             {
                 Debug.Assert(Queue != null);
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
index ebf0738..818ab64 100644
--- a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
+++ b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
@@ -1,4 +1,5 @@
 using System;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 
 namespace Lucene.Net.Search.Spell
@@ -50,7 +51,7 @@ namespace Lucene.Net.Search.Spell
         {
         }
 
-        public float getDistance(string target, string other)
+        public float GetDistance(string target, string other)
         {
             IntsRef targetPoints;
             IntsRef otherPoints;
@@ -64,8 +65,8 @@ namespace Lucene.Net.Search.Spell
             // in "distributed spellcheck", and its inefficient in other ways too...
 
             // cheaper to do this up front once
-            targetPoints = toIntsRef(target);
-            otherPoints = toIntsRef(other);
+            targetPoints = ToIntsRef(target);
+            otherPoints = ToIntsRef(other);
             n = targetPoints.Length;
             int m = otherPoints.Length;
 
@@ -104,15 +105,15 @@ namespace Lucene.Net.Search.Spell
 
             for (j = 1; j <= m; j++)
             {
-                t_j = otherPoints.ints[j - 1];
+                t_j = otherPoints.Ints[j - 1];
 
                 for (i = 1; i <= n; i++)
                 {
-                    cost = targetPoints.ints[i - 1] == t_j ? 0 : 1;
+                    cost = targetPoints.Ints[i - 1] == t_j ? 0 : 1;
                     // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
                     d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
                     // transposition
-                    if (i > 1 && j > 1 && targetPoints.ints[i - 1] == otherPoints.ints[j - 2] && targetPoints.ints[i - 2] == otherPoints.ints[j - 1])
+                    if (i > 1 && j > 1 && targetPoints.Ints[i - 1] == otherPoints.Ints[j - 2] && targetPoints.Ints[i - 2] == otherPoints.Ints[j - 1])
                     {
                         d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost);
                     }
@@ -122,13 +123,13 @@ namespace Lucene.Net.Search.Spell
             return 1.0f - ((float)d[n][m] / Math.Min(m, n));
         }
 
-        private static IntsRef toIntsRef(string s)
+        private static IntsRef ToIntsRef(string s)
         {
-            IntsRef @ref = new IntsRef(s.Length); // worst case
+            var @ref = new IntsRef(s.Length); // worst case
             int utf16Len = s.Length;
-            for (int i = 0, cp = 0; i < utf16Len; i += char.charCount(cp))
+            for (int i = 0, cp = 0; i < utf16Len; i += Character.CharCount(cp))
             {
-                cp = @ref.ints[@ref.length++] = char.codePointAt(s, i);
+                cp = @ref.Ints[@ref.Length++] = Character.CodePointAt(s, i);
             }
             return @ref;
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
index b430381..7a7aa40 100644
--- a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
+++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
@@ -13,6 +13,7 @@ using Lucene.Net.Util;
 using Lucene.Net.Util.Fst;
 using Directory = Lucene.Net.Store.Directory;
 using Version = Lucene.Net.Util.Version;
+using Util = Lucene.Net.Util.Fst.Util;
 
 namespace Lucene.Net.Search.Suggest.Analyzing
 {
@@ -254,7 +255,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		{
 		  // TODO: use ShingleAnalyzerWrapper?
 		  // Tack on ShingleFilter to the end, to generate token ngrams:
-		  return new AnalyzerWrapperAnonymousInnerClassHelper(this, other.ReuseStrategy, other);
+		  return new AnalyzerWrapperAnonymousInnerClassHelper(this, other.Strategy, other);
 		}
 	  }
 
@@ -263,7 +264,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		  private readonly FreeTextSuggester outerInstance;
 		  private readonly Analyzer other;
 
-		  public AnalyzerWrapperAnonymousInnerClassHelper(FreeTextSuggester outerInstance, UnknownType getReuseStrategy, Analyzer other) : base(getReuseStrategy)
+		  public AnalyzerWrapperAnonymousInnerClassHelper(FreeTextSuggester outerInstance, ReuseStrategy reuseStrategy, Analyzer other)
+              : base(reuseStrategy)
 		  {
 			  this.outerInstance = outerInstance;
 			  this.other = other;
@@ -321,11 +323,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		Directory dir = FSDirectory.Open(tempIndexPath);
 
 		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer);
-		iwc.OpenMode = IndexWriterConfig.OpenMode.CREATE;
+		iwc.OpenMode = IndexWriterConfig.OpenMode_e.CREATE;
 		iwc.RAMBufferSizeMB = ramBufferSizeMB;
 		IndexWriter writer = new IndexWriter(dir, iwc);
 
-		FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+		var ft = new FieldType(TextField.TYPE_NOT_STORED);
 		// TODO: if only we had IndexOptions.TERMS_ONLY...
 		ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
 		ft.OmitNorms = true;
@@ -385,7 +387,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			  totTokens += termsEnum.TotalTermFreq();
 			}
 
-			builder.Add(Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq()));
+			builder.Add(Lucene.Net.Util.Fst.Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq()));
 		  }
 
 		  fst = builder.Finish();
@@ -452,7 +454,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 	  {
 		CodecUtil.CheckHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
 		count = input.ReadVLong();
-		sbyte separatorOrig = input.ReadByte();
+		var separatorOrig = (sbyte)input.ReadByte();
 		if (separatorOrig != separator)
 		{
 		  throw new InvalidOperationException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
@@ -464,37 +466,29 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		}
 		totTokens = input.ReadVLong();
 
-		fst = new FST<>(input, PositiveIntOutputs.Singleton);
+		fst = new FST<long?>(input, PositiveIntOutputs.Singleton);
 
 		return true;
 	  }
 
-	  public override IList<LookupResult> Lookup(string key, bool onlyMorePopular, int num) // ignored
+	  public override IList<LookupResult> DoLookup(string key, bool onlyMorePopular, int num) // ignored
 	  {
-		return Lookup(key, null, onlyMorePopular, num);
+		return DoLookup(key, null, onlyMorePopular, num);
 	  }
 
 	  /// <summary>
 	  /// Lookup, without any context. </summary>
-	  public virtual IList<LookupResult> Lookup(string key, int num)
+	  public virtual IList<LookupResult> DoLookup(string key, int num)
 	  {
-		return Lookup(key, null, true, num);
+		return DoLookup(key, null, true, num);
 	  }
 
-	  public override IList<LookupResult> Lookup(string key, HashSet<BytesRef> contexts, bool onlyMorePopular, int num) // ignored
+	  public override IList<LookupResult> DoLookup(string key, HashSet<BytesRef> contexts, bool onlyMorePopular, int num) // ignored
 	  {
-		try
-		{
-		  return Lookup(key, contexts, num);
-		}
-		catch (IOException ioe)
-		{
-		  // bogus:
-		  throw new Exception(ioe);
-		}
+	      return Lookup(key, contexts, num);
 	  }
 
-	  public override long Count
+	    public override long Count
 	  {
 		  get
 		  {
@@ -604,9 +598,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			lastTokens[0] = new BytesRef();
 		  }
 
-		  FST.Arc<long?> arc = new FST.Arc<long?>();
+		  var arc = new FST.Arc<long?>();
 
-		  FST.BytesReader bytesReader = fst.BytesReader;
+		  var bytesReader = fst.BytesReader;
 
 		  // Try highest order models first, and if they return
 		  // results, return that; else, fallback:
@@ -645,14 +639,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			// match the prefix portion exactly
 			//Pair<Long,BytesRef> prefixOutput = null;
 			long? prefixOutput = null;
-			try
-			{
-			  prefixOutput = LookupPrefix(fst, bytesReader, token, arc);
-			}
-			catch (IOException bogus)
-			{
-			  throw new Exception(bogus);
-			}
+			prefixOutput = LookupPrefix(fst, bytesReader, token, arc);
 			//System.out.println("  prefixOutput=" + prefixOutput);
 
 			if (prefixOutput == null)
@@ -677,7 +664,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			  if (token.Bytes[token.Offset + i] == separator)
 			  {
 				BytesRef context = new BytesRef(token.Bytes, token.Offset, i);
-				long? output = Util.Get(fst, Util.ToIntsRef(context, new IntsRef()));
+				long? output = Util.Get(fst, Lucene.Net.Util.Fst.Util.ToIntsRef(context, new IntsRef()));
 				Debug.Assert(output != null);
 				contextCount = DecodeWeight(output);
 				lastTokenFragment = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1);
@@ -700,7 +687,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			CharsRef spare = new CharsRef();
 
 			// complete top-N
-			Util.TopResults<long?> completions = null;
+		      Util.Fst.Util.TopResults<long?> completions = null;
 			try
 			{
 
@@ -715,7 +702,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 
 			  // Must do num+seen.size() for queue depth because we may
 			  // reject up to seen.size() paths in acceptResult():
-			  Util.TopNSearcher<long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparator, seen, finalLastToken);
+			    Util.Fst.Util.TopNSearcher<long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparator, seen, finalLastToken);
 
 			  // since this search is initialized with a single start node 
 			  // it is okay to start with an empty input path here
@@ -734,11 +721,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			BytesRef suffix = new BytesRef(8);
 			//System.out.println("    " + completions.length + " completions");
 
-			  foreach (Util.Result<long?> completion in completions)
+			  foreach (Util.Fst.Util.Result<long?> completion in completions)
 			  {
 				token.Length = prefixLength;
 				// append suffix
-				Util.ToBytesRef(completion.Input, suffix);
+				Util.Fst.Util.ToBytesRef(completion.Input, suffix);
 				token.Append(suffix);
 
 				//System.out.println("    completion " + token.utf8ToString());
@@ -763,7 +750,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 				seen.Add(BytesRef.DeepCopyOf(lastToken));
 				spare.Grow(token.Length);
 				UnicodeUtil.UTF8toUTF16(token, spare);
-				LookupResult result = new LookupResult(spare.ToString(), (long)(long.MaxValue * backoff * ((double) decodeWeight(completion.Output)) / contextCount));
+				LookupResult result = new LookupResult(spare.ToString(), (long)(long.MaxValue * backoff * ((double) DecodeWeight(completion.Output)) / contextCount));
 				results.Add(result);
 				Debug.Assert(results.Count == seen.Count);
 				//System.out.println("  add result=" + result);
@@ -788,14 +775,14 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		}
 	  }
 
-	  private class TopNSearcherAnonymousInnerClassHelper : Util.TopNSearcher<long?>
+	  private class TopNSearcherAnonymousInnerClassHelper : Util.Fst.Util.TopNSearcher<long?>
 	  {
 		  private readonly FreeTextSuggester outerInstance;
 
 		  private HashSet<BytesRef> seen;
 		  private BytesRef finalLastToken;
 
-		  public TopNSearcherAnonymousInnerClassHelper<T1>(FreeTextSuggester outerInstance, FST<T1> org.apache.lucene.search.suggest.fst, int num, UnknownType size, UnknownType weightComparator, HashSet<BytesRef> seen, BytesRef finalLastToken) : base(org.apache.lucene.search.suggest.fst, num, size, weightComparator)
+		  public TopNSearcherAnonymousInnerClassHelper<T1>(FreeTextSuggester outerInstance, FST<T1> fst, int num, UnknownType size, UnknownType weightComparator, HashSet<BytesRef> seen, BytesRef finalLastToken) : base(org.apache.lucene.search.suggest.fst, num, size, weightComparator)
 		  {
 			  this.outerInstance = outerInstance;
 			  this.seen = seen;
@@ -804,11 +791,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		  }
 
 
-		  internal BytesRef scratchBytes;
+	      private BytesRef scratchBytes;
 
-		  protected internal override void addIfCompetitive(Util.FSTPath<long?> path)
+		  protected override void AddIfCompetitive(Util.Fst.Util.FSTPath<long?> path)
 		  {
-			if (path.Arc.label != outerInstance.separator)
+			if (path.Arc.Label != outerInstance.separator)
 			{
 			  //System.out.println("    keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc);
 			  base.AddIfCompetitive(path);
@@ -819,16 +806,16 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 			}
 		  }
 
-		  protected internal override bool AcceptResult(IntsRef input, long? output)
+		  protected override bool AcceptResult(IntsRef input, long? output)
 		  {
-			Util.ToBytesRef(input, scratchBytes);
+			Util.Fst.Util.ToBytesRef(input, scratchBytes);
 			finalLastToken.Grow(finalLastToken.length + scratchBytes.length);
 			int lenSav = finalLastToken.length;
-			finalLastToken.append(scratchBytes);
+			finalLastToken.Append(scratchBytes);
 			//System.out.println("    accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false));
 			bool ret = seen.Contains(finalLastToken) == false;
 
-			finalLastToken.length = lenSav;
+			finalLastToken.Length = lenSav;
 			return ret;
 		  }
 	  }
@@ -870,7 +857,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 	  /// <summary>
 	  /// cost -> weight </summary>
 	  //private long decodeWeight(Pair<Long,BytesRef> output) {
-	  private long DecodeWeight(long? output)
+	  private static long DecodeWeight(long? output)
 	  {
 		Debug.Assert(output != null);
 		return (int)(long.MaxValue - output);
@@ -880,7 +867,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 	  private long? LookupPrefix(FST<long?> fst, FST.BytesReader bytesReader, BytesRef scratch, FST.Arc<long?> arc) //Bogus
 	  {
 
-		long? output = fst.outputs.NoOutput;
+		long? output = fst.Outputs.NoOutput;
 
 		fst.GetFirstArc(arc);
 
@@ -895,7 +882,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 		  }
 		  else
 		  {
-			output = fst.outputs.add(output, arc.output);
+			output = fst.Outputs.Add(output, arc.Output);
 		  }
 		}
 
@@ -904,13 +891,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 
 	  internal static readonly IComparer<long?> weightComparator = new ComparatorAnonymousInnerClassHelper2();
 
-	  private class ComparatorAnonymousInnerClassHelper2 : IComparer<long?>
+	  private sealed class ComparatorAnonymousInnerClassHelper2 : IComparer<long?>
 	  {
 		  public ComparatorAnonymousInnerClassHelper2()
 		  {
 		  }
 
-		  public virtual int Compare(long? left, long? right)
+		  public int Compare(long? left, long? right)
 		  {
 			return left.CompareTo(right);
 		  }
@@ -922,7 +909,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
 	  /// </summary>
 	  public virtual object Get(string key)
 	  {
-		throw new System.NotSupportedException();
+		throw new NotSupportedException();
 	  }
 	}
 }
\ No newline at end of file