You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/07 21:35:14 UTC
[2/2] lucenenet git commit: Some more work on Lucene.Net.Suggest
Some more work on Lucene.Net.Suggest
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6d26b3c7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6d26b3c7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6d26b3c7
Branch: refs/heads/master
Commit: 6d26b3c7ec96d8a683616c0117dc6d0241e0074a
Parents: 709445e
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Fri Nov 7 22:34:22 2014 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Fri Nov 7 22:34:22 2014 +0200
----------------------------------------------------------------------
src/Lucene.Net.Core/Util/Fst/Util.cs | 2 +-
.../Spell/LuceneLevenshteinDistance.cs | 21 ++---
.../Suggest/Analyzing/FreeTextSuggester.cs | 93 +++++++++-----------
3 files changed, 52 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Core/Util/Fst/Util.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Fst/Util.cs b/src/Lucene.Net.Core/Util/Fst/Util.cs
index 01ee25a..4c15642 100644
--- a/src/Lucene.Net.Core/Util/Fst/Util.cs
+++ b/src/Lucene.Net.Core/Util/Fst/Util.cs
@@ -385,7 +385,7 @@ namespace Lucene.Net.Util.Fst
}
// If back plus this arc is competitive then add to queue:
- protected internal virtual void AddIfCompetitive(FSTPath<T> path)
+ protected virtual void AddIfCompetitive(FSTPath<T> path)
{
Debug.Assert(Queue != null);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
index ebf0738..818ab64 100644
--- a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
+++ b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
@@ -1,4 +1,5 @@
using System;
+using Lucene.Net.Support;
using Lucene.Net.Util;
namespace Lucene.Net.Search.Spell
@@ -50,7 +51,7 @@ namespace Lucene.Net.Search.Spell
{
}
- public float getDistance(string target, string other)
+ public float GetDistance(string target, string other)
{
IntsRef targetPoints;
IntsRef otherPoints;
@@ -64,8 +65,8 @@ namespace Lucene.Net.Search.Spell
// in "distributed spellcheck", and its inefficient in other ways too...
// cheaper to do this up front once
- targetPoints = toIntsRef(target);
- otherPoints = toIntsRef(other);
+ targetPoints = ToIntsRef(target);
+ otherPoints = ToIntsRef(other);
n = targetPoints.Length;
int m = otherPoints.Length;
@@ -104,15 +105,15 @@ namespace Lucene.Net.Search.Spell
for (j = 1; j <= m; j++)
{
- t_j = otherPoints.ints[j - 1];
+ t_j = otherPoints.Ints[j - 1];
for (i = 1; i <= n; i++)
{
- cost = targetPoints.ints[i - 1] == t_j ? 0 : 1;
+ cost = targetPoints.Ints[i - 1] == t_j ? 0 : 1;
// minimum of cell to the left+1, to the top+1, diagonally left and up +cost
d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
// transposition
- if (i > 1 && j > 1 && targetPoints.ints[i - 1] == otherPoints.ints[j - 2] && targetPoints.ints[i - 2] == otherPoints.ints[j - 1])
+ if (i > 1 && j > 1 && targetPoints.Ints[i - 1] == otherPoints.Ints[j - 2] && targetPoints.Ints[i - 2] == otherPoints.Ints[j - 1])
{
d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost);
}
@@ -122,13 +123,13 @@ namespace Lucene.Net.Search.Spell
return 1.0f - ((float)d[n][m] / Math.Min(m, n));
}
- private static IntsRef toIntsRef(string s)
+ private static IntsRef ToIntsRef(string s)
{
- IntsRef @ref = new IntsRef(s.Length); // worst case
+ var @ref = new IntsRef(s.Length); // worst case
int utf16Len = s.Length;
- for (int i = 0, cp = 0; i < utf16Len; i += char.charCount(cp))
+ for (int i = 0, cp = 0; i < utf16Len; i += Character.CharCount(cp))
{
- cp = @ref.ints[@ref.length++] = char.codePointAt(s, i);
+ cp = @ref.Ints[@ref.Length++] = Character.CodePointAt(s, i);
}
return @ref;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d26b3c7/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
index b430381..7a7aa40 100644
--- a/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
+++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/FreeTextSuggester.cs
@@ -13,6 +13,7 @@ using Lucene.Net.Util;
using Lucene.Net.Util.Fst;
using Directory = Lucene.Net.Store.Directory;
using Version = Lucene.Net.Util.Version;
+using Util = Lucene.Net.Util.Fst.Util;
namespace Lucene.Net.Search.Suggest.Analyzing
{
@@ -254,7 +255,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
{
// TODO: use ShingleAnalyzerWrapper?
// Tack on ShingleFilter to the end, to generate token ngrams:
- return new AnalyzerWrapperAnonymousInnerClassHelper(this, other.ReuseStrategy, other);
+ return new AnalyzerWrapperAnonymousInnerClassHelper(this, other.Strategy, other);
}
}
@@ -263,7 +264,8 @@ namespace Lucene.Net.Search.Suggest.Analyzing
private readonly FreeTextSuggester outerInstance;
private readonly Analyzer other;
- public AnalyzerWrapperAnonymousInnerClassHelper(FreeTextSuggester outerInstance, UnknownType getReuseStrategy, Analyzer other) : base(getReuseStrategy)
+ public AnalyzerWrapperAnonymousInnerClassHelper(FreeTextSuggester outerInstance, ReuseStrategy reuseStrategy, Analyzer other)
+ : base(reuseStrategy)
{
this.outerInstance = outerInstance;
this.other = other;
@@ -321,11 +323,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing
Directory dir = FSDirectory.Open(tempIndexPath);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer);
- iwc.OpenMode = IndexWriterConfig.OpenMode.CREATE;
+ iwc.OpenMode = IndexWriterConfig.OpenMode_e.CREATE;
iwc.RAMBufferSizeMB = ramBufferSizeMB;
IndexWriter writer = new IndexWriter(dir, iwc);
- FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+ var ft = new FieldType(TextField.TYPE_NOT_STORED);
// TODO: if only we had IndexOptions.TERMS_ONLY...
ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
ft.OmitNorms = true;
@@ -385,7 +387,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
totTokens += termsEnum.TotalTermFreq();
}
- builder.Add(Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq()));
+ builder.Add(Lucene.Net.Util.Fst.Util.ToIntsRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq()));
}
fst = builder.Finish();
@@ -452,7 +454,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
{
CodecUtil.CheckHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
count = input.ReadVLong();
- sbyte separatorOrig = input.ReadByte();
+ var separatorOrig = (sbyte)input.ReadByte();
if (separatorOrig != separator)
{
throw new InvalidOperationException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
@@ -464,37 +466,29 @@ namespace Lucene.Net.Search.Suggest.Analyzing
}
totTokens = input.ReadVLong();
- fst = new FST<>(input, PositiveIntOutputs.Singleton);
+ fst = new FST<long?>(input, PositiveIntOutputs.Singleton);
return true;
}
- public override IList<LookupResult> Lookup(string key, bool onlyMorePopular, int num) // ignored
+ public override IList<LookupResult> DoLookup(string key, bool onlyMorePopular, int num) // ignored
{
- return Lookup(key, null, onlyMorePopular, num);
+ return DoLookup(key, null, onlyMorePopular, num);
}
/// <summary>
/// Lookup, without any context. </summary>
- public virtual IList<LookupResult> Lookup(string key, int num)
+ public virtual IList<LookupResult> DoLookup(string key, int num)
{
- return Lookup(key, null, true, num);
+ return DoLookup(key, null, true, num);
}
- public override IList<LookupResult> Lookup(string key, HashSet<BytesRef> contexts, bool onlyMorePopular, int num) // ignored
+ public override IList<LookupResult> DoLookup(string key, HashSet<BytesRef> contexts, bool onlyMorePopular, int num) // ignored
{
- try
- {
- return Lookup(key, contexts, num);
- }
- catch (IOException ioe)
- {
- // bogus:
- throw new Exception(ioe);
- }
+ return Lookup(key, contexts, num);
}
- public override long Count
+ public override long Count
{
get
{
@@ -604,9 +598,9 @@ namespace Lucene.Net.Search.Suggest.Analyzing
lastTokens[0] = new BytesRef();
}
- FST.Arc<long?> arc = new FST.Arc<long?>();
+ var arc = new FST.Arc<long?>();
- FST.BytesReader bytesReader = fst.BytesReader;
+ var bytesReader = fst.BytesReader;
// Try highest order models first, and if they return
// results, return that; else, fallback:
@@ -645,14 +639,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
// match the prefix portion exactly
//Pair<Long,BytesRef> prefixOutput = null;
long? prefixOutput = null;
- try
- {
- prefixOutput = LookupPrefix(fst, bytesReader, token, arc);
- }
- catch (IOException bogus)
- {
- throw new Exception(bogus);
- }
+ prefixOutput = LookupPrefix(fst, bytesReader, token, arc);
//System.out.println(" prefixOutput=" + prefixOutput);
if (prefixOutput == null)
@@ -677,7 +664,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
if (token.Bytes[token.Offset + i] == separator)
{
BytesRef context = new BytesRef(token.Bytes, token.Offset, i);
- long? output = Util.Get(fst, Util.ToIntsRef(context, new IntsRef()));
+ long? output = Util.Get(fst, Lucene.Net.Util.Fst.Util.ToIntsRef(context, new IntsRef()));
Debug.Assert(output != null);
contextCount = DecodeWeight(output);
lastTokenFragment = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1);
@@ -700,7 +687,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
CharsRef spare = new CharsRef();
// complete top-N
- Util.TopResults<long?> completions = null;
+ Util.Fst.Util.TopResults<long?> completions = null;
try
{
@@ -715,7 +702,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
// Must do num+seen.size() for queue depth because we may
// reject up to seen.size() paths in acceptResult():
- Util.TopNSearcher<long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparator, seen, finalLastToken);
+ Util.Fst.Util.TopNSearcher<long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparator, seen, finalLastToken);
// since this search is initialized with a single start node
// it is okay to start with an empty input path here
@@ -734,11 +721,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing
BytesRef suffix = new BytesRef(8);
//System.out.println(" " + completions.length + " completions");
- foreach (Util.Result<long?> completion in completions)
+ foreach (Util.Fst.Util.Result<long?> completion in completions)
{
token.Length = prefixLength;
// append suffix
- Util.ToBytesRef(completion.Input, suffix);
+ Util.Fst.Util.ToBytesRef(completion.Input, suffix);
token.Append(suffix);
//System.out.println(" completion " + token.utf8ToString());
@@ -763,7 +750,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
seen.Add(BytesRef.DeepCopyOf(lastToken));
spare.Grow(token.Length);
UnicodeUtil.UTF8toUTF16(token, spare);
- LookupResult result = new LookupResult(spare.ToString(), (long)(long.MaxValue * backoff * ((double) decodeWeight(completion.Output)) / contextCount));
+ LookupResult result = new LookupResult(spare.ToString(), (long)(long.MaxValue * backoff * ((double) DecodeWeight(completion.Output)) / contextCount));
results.Add(result);
Debug.Assert(results.Count == seen.Count);
//System.out.println(" add result=" + result);
@@ -788,14 +775,14 @@ namespace Lucene.Net.Search.Suggest.Analyzing
}
}
- private class TopNSearcherAnonymousInnerClassHelper : Util.TopNSearcher<long?>
+ private class TopNSearcherAnonymousInnerClassHelper : Util.Fst.Util.TopNSearcher<long?>
{
private readonly FreeTextSuggester outerInstance;
private HashSet<BytesRef> seen;
private BytesRef finalLastToken;
- public TopNSearcherAnonymousInnerClassHelper<T1>(FreeTextSuggester outerInstance, FST<T1> org.apache.lucene.search.suggest.fst, int num, UnknownType size, UnknownType weightComparator, HashSet<BytesRef> seen, BytesRef finalLastToken) : base(org.apache.lucene.search.suggest.fst, num, size, weightComparator)
+ public TopNSearcherAnonymousInnerClassHelper<T1>(FreeTextSuggester outerInstance, FST<T1> fst, int num, UnknownType size, UnknownType weightComparator, HashSet<BytesRef> seen, BytesRef finalLastToken) : base(org.apache.lucene.search.suggest.fst, num, size, weightComparator)
{
this.outerInstance = outerInstance;
this.seen = seen;
@@ -804,11 +791,11 @@ namespace Lucene.Net.Search.Suggest.Analyzing
}
- internal BytesRef scratchBytes;
+ private BytesRef scratchBytes;
- protected internal override void addIfCompetitive(Util.FSTPath<long?> path)
+ protected override void AddIfCompetitive(Util.Fst.Util.FSTPath<long?> path)
{
- if (path.Arc.label != outerInstance.separator)
+ if (path.Arc.Label != outerInstance.separator)
{
//System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc);
base.AddIfCompetitive(path);
@@ -819,16 +806,16 @@ namespace Lucene.Net.Search.Suggest.Analyzing
}
}
- protected internal override bool AcceptResult(IntsRef input, long? output)
+ protected override bool AcceptResult(IntsRef input, long? output)
{
- Util.ToBytesRef(input, scratchBytes);
+ Util.Fst.Util.ToBytesRef(input, scratchBytes);
finalLastToken.Grow(finalLastToken.length + scratchBytes.length);
int lenSav = finalLastToken.length;
- finalLastToken.append(scratchBytes);
+ finalLastToken.Append(scratchBytes);
//System.out.println(" accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false));
bool ret = seen.Contains(finalLastToken) == false;
- finalLastToken.length = lenSav;
+ finalLastToken.Length = lenSav;
return ret;
}
}
@@ -870,7 +857,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
/// <summary>
/// cost -> weight </summary>
//private long decodeWeight(Pair<Long,BytesRef> output) {
- private long DecodeWeight(long? output)
+ private static long DecodeWeight(long? output)
{
Debug.Assert(output != null);
return (int)(long.MaxValue - output);
@@ -880,7 +867,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
private long? LookupPrefix(FST<long?> fst, FST.BytesReader bytesReader, BytesRef scratch, FST.Arc<long?> arc) //Bogus
{
- long? output = fst.outputs.NoOutput;
+ long? output = fst.Outputs.NoOutput;
fst.GetFirstArc(arc);
@@ -895,7 +882,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
}
else
{
- output = fst.outputs.add(output, arc.output);
+ output = fst.Outputs.Add(output, arc.Output);
}
}
@@ -904,13 +891,13 @@ namespace Lucene.Net.Search.Suggest.Analyzing
internal static readonly IComparer<long?> weightComparator = new ComparatorAnonymousInnerClassHelper2();
- private class ComparatorAnonymousInnerClassHelper2 : IComparer<long?>
+ private sealed class ComparatorAnonymousInnerClassHelper2 : IComparer<long?>
{
public ComparatorAnonymousInnerClassHelper2()
{
}
- public virtual int Compare(long? left, long? right)
+ public int Compare(long? left, long? right)
{
return left.CompareTo(right);
}
@@ -922,7 +909,7 @@ namespace Lucene.Net.Search.Suggest.Analyzing
/// </summary>
public virtual object Get(string key)
{
- throw new System.NotSupportedException();
+ throw new NotSupportedException();
}
}
}
\ No newline at end of file