You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/26 23:37:53 UTC
[65/72] [abbrv] lucenenet git commit: Lucene.Net.TestFramework:
Renamed Util\fst\ to Util\Fst\
Lucene.Net.TestFramework: Renamed Util\fst\ to Util\Fst\
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/84ad7a30
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/84ad7a30
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/84ad7a30
Branch: refs/heads/api-work
Commit: 84ad7a307d0376599bcf0be1895f212268044681
Parents: 6a55c21
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 26 03:40:33 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Feb 27 06:18:00 2017 +0700
----------------------------------------------------------------------
.../Lucene.Net.TestFramework.csproj | 2 +-
.../Util/Fst/FSTTester.cs | 1013 ++++++++++++++++++
.../Util/fst/FSTTester.cs | 1013 ------------------
3 files changed, 1014 insertions(+), 1014 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/84ad7a30/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
index 788338e..c7b9446 100644
--- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
+++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
@@ -446,7 +446,7 @@
<Compile Include="Util\FailOnNonBulkMergesInfoStream.cs">
<SubType>Code</SubType>
</Compile>
- <Compile Include="Util\fst\FSTTester.cs">
+ <Compile Include="Util\Fst\FSTTester.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Util\LineFileDocs.cs">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/84ad7a30/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
new file mode 100644
index 0000000..46c6f61
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
@@ -0,0 +1,1013 @@
+using Lucene.Net.Randomized.Generators;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Util.Fst
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Directory = Lucene.Net.Store.Directory;
+ using IndexInput = Lucene.Net.Store.IndexInput;
+ using IndexOutput = Lucene.Net.Store.IndexOutput;
+ using IOContext = Lucene.Net.Store.IOContext;
+ using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s;
+
+ /// <summary>
+ /// Helper class to test FSTs. </summary>
+ public class FSTTester<T>
+ {
+ internal readonly Random Random;
+ internal readonly List<InputOutput<T>> Pairs;
+ internal readonly int InputMode;
+ internal readonly Outputs<T> Outputs;
+ internal readonly Directory Dir;
+ internal readonly bool DoReverseLookup;
+
+ public FSTTester(Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs, bool doReverseLookup)
+ {
+ this.Random = random;
+ this.Dir = dir;
+ this.InputMode = inputMode;
+ this.Pairs = pairs;
+ this.Outputs = outputs;
+ this.DoReverseLookup = doReverseLookup;
+ }
+
+ internal static string InputToString(int inputMode, Int32sRef term)
+ {
+ return InputToString(inputMode, term, true);
+ }
+
+ internal static string InputToString(int inputMode, Int32sRef term, bool isValidUnicode)
+ {
+ if (!isValidUnicode)
+ {
+ return term.ToString();
+ }
+ else if (inputMode == 0)
+ {
+ // utf8
+ return ToBytesRef(term).Utf8ToString() + " " + term;
+ }
+ else
+ {
+ // utf32
+ return UnicodeUtil.NewString(term.Int32s, term.Offset, term.Length) + " " + term;
+ }
+ }
+
+ private static BytesRef ToBytesRef(Int32sRef ir)
+ {
+ BytesRef br = new BytesRef(ir.Length);
+ for (int i = 0; i < ir.Length; i++)
+ {
+ int x = ir.Int32s[ir.Offset + i];
+ Debug.Assert(x >= 0 && x <= 255);
+ br.Bytes[i] = (byte)x;
+ }
+ br.Length = ir.Length;
+ return br;
+ }
+
+ internal static string GetRandomString(Random random)
+ {
+ string term;
+ if (random.NextBoolean())
+ {
+ term = TestUtil.RandomRealisticUnicodeString(random);
+ }
+ else
+ {
+ // we want to mix in limited-alphabet symbols so
+ // we get more sharing of the nodes given how few
+ // terms we are testing...
+ term = SimpleRandomString(random);
+ }
+ return term;
+ }
+
+ internal static string SimpleRandomString(Random r)
+ {
+ int end = r.Next(10);
+ if (end == 0)
+ {
+ // allow 0 length
+ return "";
+ }
+ char[] buffer = new char[end];
+ for (int i = 0; i < end; i++)
+ {
+ buffer[i] = (char)TestUtil.NextInt(r, 97, 102);
+ }
+ return new string(buffer, 0, end);
+ }
+
+ internal static Int32sRef ToIntsRef(string s, int inputMode)
+ {
+ return ToIntsRef(s, inputMode, new Int32sRef(10));
+ }
+
+ internal static Int32sRef ToIntsRef(string s, int inputMode, Int32sRef ir)
+ {
+ if (inputMode == 0)
+ {
+ // utf8
+ return ToIntsRef(new BytesRef(s), ir);
+ }
+ else
+ {
+ // utf32
+ return ToIntsRefUTF32(s, ir);
+ }
+ }
+
+ internal static Int32sRef ToIntsRefUTF32(string s, Int32sRef ir)
+ {
+ int charLength = s.Length;
+ int charIdx = 0;
+ int intIdx = 0;
+ while (charIdx < charLength)
+ {
+ if (intIdx == ir.Int32s.Length)
+ {
+ ir.Grow(intIdx + 1);
+ }
+ int utf32 = Character.CodePointAt(s, charIdx);
+ ir.Int32s[intIdx] = utf32;
+ charIdx += Character.CharCount(utf32);
+ intIdx++;
+ }
+ ir.Length = intIdx;
+ return ir;
+ }
+
+ internal static Int32sRef ToIntsRef(BytesRef br, Int32sRef ir)
+ {
+ if (br.Length > ir.Int32s.Length)
+ {
+ ir.Grow(br.Length);
+ }
+ for (int i = 0; i < br.Length; i++)
+ {
+ ir.Int32s[i] = br.Bytes[br.Offset + i] & 0xFF;
+ }
+ ir.Length = br.Length;
+ return ir;
+ }
+
+ /// <summary>
+ /// Holds one input/output pair. </summary>
+ public class InputOutput<T1> : IComparable<InputOutput<T1>>
+ {
+ public readonly Int32sRef Input;
+ public readonly T1 Output;
+
+ public InputOutput(Int32sRef input, T1 output)
+ {
+ this.Input = input;
+ this.Output = output;
+ }
+
+ public virtual int CompareTo(InputOutput<T1> other)
+ {
+ return this.Input.CompareTo(other.Input);
+ }
+ }
+
+ public virtual void DoTest(bool testPruning)
+ {
+ // no pruning
+ DoTest(0, 0, true);
+
+ if (testPruning)
+ {
+ // simple pruning
+ DoTest(TestUtil.NextInt(Random, 1, 1 + Pairs.Count), 0, true);
+
+ // leafy pruning
+ DoTest(0, TestUtil.NextInt(Random, 1, 1 + Pairs.Count), true);
+ }
+ }
+
+ // runs the term, returning the output, or null if term
+ // isn't accepted. if prefixLength is non-null it must be
+ // length 1 int array; prefixLength[0] is set to the length
+ // of the term prefix that matches
+ private T Run(FST<T> fst, Int32sRef term, int[] prefixLength)
+ {
+ Debug.Assert(prefixLength == null || prefixLength.Length == 1);
+ FST.Arc<T> arc = fst.GetFirstArc(new FST.Arc<T>());
+ T NO_OUTPUT = fst.Outputs.NoOutput;
+ T output = NO_OUTPUT;
+ FST.BytesReader fstReader = fst.GetBytesReader();
+
+ for (int i = 0; i <= term.Length; i++)
+ {
+ int label;
+ if (i == term.Length)
+ {
+ label = FST.END_LABEL;
+ }
+ else
+ {
+ label = term.Int32s[term.Offset + i];
+ }
+ // System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.Outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
+ if (fst.FindTargetArc(label, arc, arc, fstReader) == null)
+ {
+ // System.out.println(" not found");
+ if (prefixLength != null)
+ {
+ prefixLength[0] = i;
+ return output;
+ }
+ else
+ {
+ return default(T);
+ }
+ }
+ output = fst.Outputs.Add(output, arc.Output);
+ }
+
+ if (prefixLength != null)
+ {
+ prefixLength[0] = term.Length;
+ }
+
+ return output;
+ }
+
+ private T RandomAcceptedWord(FST<T> fst, Int32sRef @in)
+ {
+ FST.Arc<T> arc = fst.GetFirstArc(new FST.Arc<T>());
+
+ IList<FST.Arc<T>> arcs = new List<FST.Arc<T>>();
+ @in.Length = 0;
+ @in.Offset = 0;
+ T NO_OUTPUT = fst.Outputs.NoOutput;
+ T output = NO_OUTPUT;
+ FST.BytesReader fstReader = fst.GetBytesReader();
+
+ while (true)
+ {
+ // read all arcs:
+ fst.ReadFirstTargetArc(arc, arc, fstReader);
+ arcs.Add((new FST.Arc<T>()).CopyFrom(arc));
+ while (!arc.IsLast)
+ {
+ fst.ReadNextArc(arc, fstReader);
+ arcs.Add((new FST.Arc<T>()).CopyFrom(arc));
+ }
+
+ // pick one
+ arc = arcs[Random.Next(arcs.Count)];
+ arcs.Clear();
+
+ // accumulate output
+ output = fst.Outputs.Add(output, arc.Output);
+
+ // append label
+ if (arc.Label == FST.END_LABEL)
+ {
+ break;
+ }
+
+ if (@in.Int32s.Length == @in.Length)
+ {
+ @in.Grow(1 + @in.Length);
+ }
+ @in.Int32s[@in.Length++] = arc.Label;
+ }
+
+ return output;
+ }
+
+ internal virtual FST<T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
+ }
+
+ bool willRewrite = Random.NextBoolean();
+
+ Builder<T> builder = new Builder<T>(InputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
+ prune1, prune2,
+ prune1 == 0 && prune2 == 0,
+ allowRandomSuffixSharing ? Random.NextBoolean() : true,
+ allowRandomSuffixSharing ? TestUtil.NextInt(Random, 1, 10) : int.MaxValue,
+ Outputs,
+ null,
+ willRewrite,
+ PackedInt32s.DEFAULT,
+ true,
+ 15);
+ if (LuceneTestCase.VERBOSE)
+ {
+ if (willRewrite)
+ {
+ Console.WriteLine("TEST: packed FST");
+ }
+ else
+ {
+ Console.WriteLine("TEST: non-packed FST");
+ }
+ }
+
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ if (pair.Output is IEnumerable)
+ {
+ Builder<object> builderObject = builder as Builder<object>;
+ var values = pair.Output as IEnumerable;
+ foreach (object value in values)
+ {
+ builderObject.Add(pair.Input, value);
+ }
+ }
+ else
+ {
+ builder.Add(pair.Input, pair.Output);
+ }
+ }
+ FST<T> fst = builder.Finish();
+
+ if (Random.NextBoolean() && fst != null && !willRewrite)
+ {
+ IOContext context = LuceneTestCase.NewIOContext(Random);
+ using (IndexOutput @out = Dir.CreateOutput("fst.bin", context))
+ {
+ fst.Save(@out);
+ }
+ IndexInput @in = Dir.OpenInput("fst.bin", context);
+ try
+ {
+ fst = new FST<T>(@in, Outputs);
+ }
+ finally
+ {
+ @in.Dispose();
+ Dir.DeleteFile("fst.bin");
+ }
+ }
+
+ if (LuceneTestCase.VERBOSE && Pairs.Count <= 20 && fst != null)
+ {
+ using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
+ {
+ Util.ToDot(fst, w, false, false);
+ }
+ Console.WriteLine("SAVED out.dot");
+ }
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ if (fst == null)
+ {
+ Console.WriteLine(" fst has 0 nodes (fully pruned)");
+ }
+ else
+ {
+ Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
+ }
+ }
+
+ if (prune1 == 0 && prune2 == 0)
+ {
+ VerifyUnPruned(InputMode, fst);
+ }
+ else
+ {
+ VerifyPruned(InputMode, fst, prune1, prune2);
+ }
+
+ return fst;
+ }
+
+ protected internal virtual bool OutputsEqual(T a, T b)
+ {
+ // LUCENENET: In .NET, IEnumerables do not automatically test to ensure
+ // their values are equal, so we need to do that manually.
+ // Note that we are testing the values without regard to whether
+ // the enumerable type is nullable.
+ return a.ValueEquals(b);
+ }
+
+ // FST is complete
+ private void VerifyUnPruned(int inputMode, FST<T> fst)
+ {
+ FST<long?> fstLong;
+ ISet<long?> validOutputs;
+ long minLong = long.MaxValue;
+ long maxLong = long.MinValue;
+
+ if (DoReverseLookup)
+ {
+ FST<long?> fstLong0 = fst as FST<long?>;
+ fstLong = fstLong0;
+ validOutputs = new HashSet<long?>();
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ long? output = pair.Output as long?;
+ maxLong = Math.Max(maxLong, output.Value);
+ minLong = Math.Min(minLong, output.Value);
+ validOutputs.Add(output.Value);
+ }
+ }
+ else
+ {
+ fstLong = null;
+ validOutputs = null;
+ }
+
+ if (Pairs.Count == 0)
+ {
+ Assert.IsNull(fst);
+ return;
+ }
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: now verify " + Pairs.Count + " terms");
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ Assert.IsNotNull(pair);
+ Assert.IsNotNull(pair.Input);
+ Assert.IsNotNull(pair.Output);
+ Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output));
+ }
+ }
+
+ Assert.IsNotNull(fst);
+
+ // visit valid pairs in order -- make sure all words
+ // are accepted, and FSTEnum's next() steps through
+ // them correctly
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: check valid terms/next()");
+ }
+ {
+ Int32sRefFSTEnum<T> fstEnum = new Int32sRefFSTEnum<T>(fst);
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ Int32sRef term = pair.Input;
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output));
+ }
+ T output = Run(fst, term, null);
+ Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted");
+ Assert.IsTrue(OutputsEqual(pair.Output, output));
+
+ // verify enum's next
+ Int32sRefFSTEnum.InputOutput<T> t = fstEnum.Next();
+ Assert.IsNotNull(t);
+ Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input));
+ Assert.IsTrue(OutputsEqual(pair.Output, t.Output));
+ }
+ Assert.IsNull(fstEnum.Next());
+ }
+
+ IDictionary<Int32sRef, T> termsMap = new Dictionary<Int32sRef, T>();
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ termsMap[pair.Input] = pair.Output;
+ }
+
+ if (DoReverseLookup && maxLong > minLong)
+ {
+ // Do random lookups so we test null (output doesn't
+ // exist) case:
+ Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7));
+ Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7));
+
+ int num = LuceneTestCase.AtLeast(Random, 100);
+ for (int iter = 0; iter < num; iter++)
+ {
+ long v = TestUtil.NextLong(Random, minLong, maxLong);
+ Int32sRef input = Util.GetByOutput(fstLong, v);
+ Assert.IsTrue(validOutputs.Contains(v) || input == null);
+ }
+ }
+
+ // find random matching word and make sure it's valid
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: verify random accepted terms");
+ }
+ Int32sRef scratch = new Int32sRef(10);
+ int num_ = LuceneTestCase.AtLeast(Random, 500);
+ for (int iter = 0; iter < num_; iter++)
+ {
+ T output = RandomAcceptedWord(fst, scratch);
+ Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid");
+ Assert.IsTrue(OutputsEqual(termsMap[scratch], output));
+
+ if (DoReverseLookup)
+ {
+ //System.out.println("lookup output=" + output + " outs=" + fst.Outputs);
+ Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value);
+ Assert.IsNotNull(input);
+ //System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
+ Assert.AreEqual(scratch, input);
+ }
+ }
+
+ // test IntsRefFSTEnum.Seek:
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: verify seek");
+ }
+ Int32sRefFSTEnum<T> fstEnum_ = new Int32sRefFSTEnum<T>(fst);
+ num_ = LuceneTestCase.AtLeast(Random, 100);
+ for (int iter = 0; iter < num_; iter++)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" iter=" + iter);
+ }
+ if (Random.NextBoolean())
+ {
+ // seek to term that doesn't exist:
+ while (true)
+ {
+ Int32sRef term = ToIntsRef(GetRandomString(Random), inputMode);
+ int pos = Pairs.BinarySearch(new InputOutput<T>(term, default(T)));
+ if (pos < 0)
+ {
+ pos = -(pos + 1);
+ // ok doesn't exist
+ //System.out.println(" seek " + inputToString(inputMode, term));
+ Int32sRefFSTEnum.InputOutput<T> seekResult;
+ if (Random.Next(3) == 0)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do non-exist seekExact term=" + InputToString(inputMode, term));
+ }
+ seekResult = fstEnum_.SeekExact(term);
+ pos = -1;
+ }
+ else if (Random.NextBoolean())
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do non-exist seekFloor term=" + InputToString(inputMode, term));
+ }
+ seekResult = fstEnum_.SeekFloor(term);
+ pos--;
+ }
+ else
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do non-exist seekCeil term=" + InputToString(inputMode, term));
+ }
+ seekResult = fstEnum_.SeekCeil(term);
+ }
+
+ if (pos != -1 && pos < Pairs.Count)
+ {
+ //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output));
+ Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, Pairs[pos].Input));
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" got " + InputToString(inputMode, seekResult.Input));
+ }
+ Assert.AreEqual(Pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, Pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input));
+ Assert.IsTrue(OutputsEqual(Pairs[pos].Output, seekResult.Output));
+ }
+ else
+ {
+ // seeked before start or beyond end
+ //System.out.println("seek=" + seekTerm);
+ Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input)));
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" got null");
+ }
+ }
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ // seek to term that does exist:
+ InputOutput<T> pair = Pairs[Random.Next(Pairs.Count)];
+ Int32sRefFSTEnum.InputOutput<T> seekResult;
+ if (Random.Next(3) == 2)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do exists seekExact term=" + InputToString(inputMode, pair.Input));
+ }
+ seekResult = fstEnum_.SeekExact(pair.Input);
+ }
+ else if (Random.NextBoolean())
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do exists seekFloor " + InputToString(inputMode, pair.Input));
+ }
+ seekResult = fstEnum_.SeekFloor(pair.Input);
+ }
+ else
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do exists seekCeil " + InputToString(inputMode, pair.Input));
+ }
+ seekResult = fstEnum_.SeekCeil(pair.Input);
+ }
+ Assert.IsNotNull(seekResult);
+ Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input));
+ Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output));
+ }
+ }
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: mixed next/seek");
+ }
+
+ // test mixed next/seek
+ num_ = LuceneTestCase.AtLeast(Random, 100);
+ for (int iter = 0; iter < num_; iter++)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: iter " + iter);
+ }
+ // reset:
+ fstEnum_ = new Int32sRefFSTEnum<T>(fst);
+ int upto = -1;
+ while (true)
+ {
+ bool isDone = false;
+ if (upto == Pairs.Count - 1 || Random.NextBoolean())
+ {
+ // next
+ upto++;
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do next");
+ }
+ isDone = fstEnum_.Next() == null;
+ }
+ else if (upto != -1 && upto < 0.75 * Pairs.Count && Random.NextBoolean())
+ {
+ int attempt = 0;
+ for (; attempt < 10; attempt++)
+ {
+ Int32sRef term = ToIntsRef(GetRandomString(Random), inputMode);
+ if (!termsMap.ContainsKey(term) && term.CompareTo(Pairs[upto].Input) > 0)
+ {
+ int pos = Pairs.BinarySearch(new InputOutput<T>(term, default(T)));
+ Debug.Assert(pos < 0);
+ upto = -(pos + 1);
+
+ if (Random.NextBoolean())
+ {
+ upto--;
+ Assert.IsTrue(upto != -1);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do non-exist seekFloor(" + InputToString(inputMode, term) + ")");
+ }
+ isDone = fstEnum_.SeekFloor(term) == null;
+ }
+ else
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do non-exist seekCeil(" + InputToString(inputMode, term) + ")");
+ }
+ isDone = fstEnum_.SeekCeil(term) == null;
+ }
+
+ break;
+ }
+ }
+ if (attempt == 10)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ int inc = Random.Next(Pairs.Count - upto - 1);
+ upto += inc;
+ if (upto == -1)
+ {
+ upto = 0;
+ }
+
+ if (Random.NextBoolean())
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do seekCeil(" + InputToString(inputMode, Pairs[upto].Input) + ")");
+ }
+ isDone = fstEnum_.SeekCeil(Pairs[upto].Input) == null;
+ }
+ else
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" do seekFloor(" + InputToString(inputMode, Pairs[upto].Input) + ")");
+ }
+ isDone = fstEnum_.SeekFloor(Pairs[upto].Input) == null;
+ }
+ }
+ if (LuceneTestCase.VERBOSE)
+ {
+ if (!isDone)
+ {
+ Console.WriteLine(" got " + InputToString(inputMode, fstEnum_.Current.Input));
+ }
+ else
+ {
+ Console.WriteLine(" got null");
+ }
+ }
+
+ if (upto == Pairs.Count)
+ {
+ Assert.IsTrue(isDone);
+ break;
+ }
+ else
+ {
+ Assert.IsFalse(isDone);
+ Assert.AreEqual(Pairs[upto].Input, fstEnum_.Current.Input);
+ Assert.IsTrue(OutputsEqual(Pairs[upto].Output, fstEnum_.Current.Output));
+
+ /*
+ if (upto < pairs.size()-1) {
+ int tryCount = 0;
+ while(tryCount < 10) {
+ final IntsRef t = toIntsRef(getRandomString(), inputMode);
+ if (pairs.get(upto).input.compareTo(t) < 0) {
+ final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
+ }
+ Assert.AreEqual(expected, fstEnum.beforeNext(t));
+ break;
+ }
+ tryCount++;
+ }
+ }
+ */
+ }
+ }
+ }
+ }
+
+ private class CountMinOutput<S>
+ {
+ internal int Count;
+ internal S Output;
+ internal S FinalOutput;
+ internal bool IsLeaf = true;
+ internal bool IsFinal;
+ }
+
+ // FST is pruned
+ private void VerifyPruned(int inputMode, FST<T> fst, int prune1, int prune2)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: now verify pruned " + Pairs.Count + " terms; outputs=" + Outputs);
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output));
+ }
+ }
+
+ // To validate the FST, we brute-force compute all prefixes
+ // in the terms, matched to their "common" outputs, prune that
+ // set according to the prune thresholds, then assert the FST
+ // matches that same set.
+
+ // NOTE: Crazy RAM intensive!!
+
+ //System.out.println("TEST: tally prefixes");
+
+ // build all prefixes
+ IDictionary<Int32sRef, CountMinOutput<T>> prefixes = new HashMap<Int32sRef, CountMinOutput<T>>();
+ Int32sRef scratch = new Int32sRef(10);
+ foreach (InputOutput<T> pair in Pairs)
+ {
+ scratch.CopyInt32s(pair.Input);
+ for (int idx = 0; idx <= pair.Input.Length; idx++)
+ {
+ scratch.Length = idx;
+ CountMinOutput<T> cmo = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
+ if (cmo == null)
+ {
+ cmo = new CountMinOutput<T>();
+ cmo.Count = 1;
+ cmo.Output = pair.Output;
+ prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo;
+ }
+ else
+ {
+ cmo.Count++;
+ T output1 = cmo.Output;
+ if (output1.Equals(Outputs.NoOutput))
+ {
+ output1 = Outputs.NoOutput;
+ }
+ T output2 = pair.Output;
+ if (output2.Equals(Outputs.NoOutput))
+ {
+ output2 = Outputs.NoOutput;
+ }
+ cmo.Output = Outputs.Common(output1, output2);
+ }
+ if (idx == pair.Input.Length)
+ {
+ cmo.IsFinal = true;
+ cmo.FinalOutput = cmo.Output;
+ }
+ }
+ }
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: now prune");
+ }
+
+
+ // prune 'em
+ // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since
+ // in .NET you cannot delete records while enumerating forward through a dictionary.
+ for (int i = prefixes.Count - 1; i >= 0; i--)
+ {
+ KeyValuePair<Int32sRef, CountMinOutput<T>> ent = prefixes.ElementAt(i);
+ Int32sRef prefix = ent.Key;
+ CountMinOutput<T> cmo = ent.Value;
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + Outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal);
+ }
+ bool keep;
+ if (prune1 > 0)
+ {
+ keep = cmo.Count >= prune1;
+ }
+ else
+ {
+ Debug.Assert(prune2 > 0);
+ if (prune2 > 1 && cmo.Count >= prune2)
+ {
+ keep = true;
+ }
+ else if (prefix.Length > 0)
+ {
+ // consult our parent
+ scratch.Length = prefix.Length - 1;
+ Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length);
+ CountMinOutput<T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
+ //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count));
+ keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
+ }
+ else if (cmo.Count >= prune2)
+ {
+ keep = true;
+ }
+ else
+ {
+ keep = false;
+ }
+ }
+
+ if (!keep)
+ {
+ prefixes.Remove(prefix);
+ //System.out.println(" remove");
+ }
+ else
+ {
+ // clear isLeaf for all ancestors
+ //System.out.println(" keep");
+ scratch.CopyInt32s(prefix);
+ scratch.Length--;
+ while (scratch.Length >= 0)
+ {
+ CountMinOutput<T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
+ if (cmo2 != null)
+ {
+ //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch));
+ cmo2.IsLeaf = false;
+ }
+ scratch.Length--;
+ }
+ }
+ }
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: after prune");
+ foreach (KeyValuePair<Int32sRef, CountMinOutput<T>> ent in prefixes)
+ {
+ Console.WriteLine(" " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal);
+ if (ent.Value.IsFinal)
+ {
+ Console.WriteLine(" finalOutput=" + Outputs.OutputToString(ent.Value.FinalOutput));
+ }
+ }
+ }
+
+ if (prefixes.Count <= 1)
+ {
+ Assert.IsNull(fst);
+ return;
+ }
+
+ Assert.IsNotNull(fst);
+
+ // make sure FST only enums valid prefixes
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: check pruned enum");
+ }
+ Int32sRefFSTEnum<T> fstEnum = new Int32sRefFSTEnum<T>(fst);
+ Int32sRefFSTEnum.InputOutput<T> current;
+ while ((current = fstEnum.Next()) != null)
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine(" fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + Outputs.OutputToString(current.Output));
+ }
+ CountMinOutput<T> cmo = prefixes.ContainsKey(current.Input) ? prefixes[current.Input] : null;
+ Assert.IsNotNull(cmo);
+ Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal);
+ //if (cmo.isFinal && !cmo.isLeaf) {
+ if (cmo.IsFinal)
+ {
+ Assert.AreEqual(cmo.FinalOutput, current.Output);
+ }
+ else
+ {
+ Assert.AreEqual(cmo.Output, current.Output);
+ }
+ }
+
+ // make sure all non-pruned prefixes are present in the FST
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: verify all prefixes");
+ }
+ int[] stopNode = new int[1];
+ foreach (KeyValuePair<Int32sRef, CountMinOutput<T>> ent in prefixes)
+ {
+ if (ent.Key.Length > 0)
+ {
+ CountMinOutput<T> cmo = ent.Value;
+ T output = Run(fst, ent.Key, stopNode);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + Outputs.OutputToString(cmo.Output));
+ }
+ // if (cmo.isFinal && !cmo.isLeaf) {
+ if (cmo.IsFinal)
+ {
+ Assert.AreEqual(cmo.FinalOutput, output);
+ }
+ else
+ {
+ Assert.AreEqual(cmo.Output, output);
+ }
+ Assert.AreEqual(ent.Key.Length, stopNode[0]);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/84ad7a30/src/Lucene.Net.TestFramework/Util/fst/FSTTester.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/fst/FSTTester.cs b/src/Lucene.Net.TestFramework/Util/fst/FSTTester.cs
deleted file mode 100644
index 46c6f61..0000000
--- a/src/Lucene.Net.TestFramework/Util/fst/FSTTester.cs
+++ /dev/null
@@ -1,1013 +0,0 @@
-using Lucene.Net.Randomized.Generators;
-using Lucene.Net.Support;
-using NUnit.Framework;
-using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.IO;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net.Util.Fst
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using Directory = Lucene.Net.Store.Directory;
- using IndexInput = Lucene.Net.Store.IndexInput;
- using IndexOutput = Lucene.Net.Store.IndexOutput;
- using IOContext = Lucene.Net.Store.IOContext;
- using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s;
-
- /// <summary>
- /// Helper class to test FSTs. </summary>
- public class FSTTester<T>
- {
- internal readonly Random Random;
- internal readonly List<InputOutput<T>> Pairs;
- internal readonly int InputMode;
- internal readonly Outputs<T> Outputs;
- internal readonly Directory Dir;
- internal readonly bool DoReverseLookup;
-
- public FSTTester(Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs, bool doReverseLookup)
- {
- this.Random = random;
- this.Dir = dir;
- this.InputMode = inputMode;
- this.Pairs = pairs;
- this.Outputs = outputs;
- this.DoReverseLookup = doReverseLookup;
- }
-
- internal static string InputToString(int inputMode, Int32sRef term)
- {
- return InputToString(inputMode, term, true);
- }
-
- internal static string InputToString(int inputMode, Int32sRef term, bool isValidUnicode)
- {
- if (!isValidUnicode)
- {
- return term.ToString();
- }
- else if (inputMode == 0)
- {
- // utf8
- return ToBytesRef(term).Utf8ToString() + " " + term;
- }
- else
- {
- // utf32
- return UnicodeUtil.NewString(term.Int32s, term.Offset, term.Length) + " " + term;
- }
- }
-
- private static BytesRef ToBytesRef(Int32sRef ir)
- {
- BytesRef br = new BytesRef(ir.Length);
- for (int i = 0; i < ir.Length; i++)
- {
- int x = ir.Int32s[ir.Offset + i];
- Debug.Assert(x >= 0 && x <= 255);
- br.Bytes[i] = (byte)x;
- }
- br.Length = ir.Length;
- return br;
- }
-
- internal static string GetRandomString(Random random)
- {
- string term;
- if (random.NextBoolean())
- {
- term = TestUtil.RandomRealisticUnicodeString(random);
- }
- else
- {
- // we want to mix in limited-alphabet symbols so
- // we get more sharing of the nodes given how few
- // terms we are testing...
- term = SimpleRandomString(random);
- }
- return term;
- }
-
- internal static string SimpleRandomString(Random r)
- {
- int end = r.Next(10);
- if (end == 0)
- {
- // allow 0 length
- return "";
- }
- char[] buffer = new char[end];
- for (int i = 0; i < end; i++)
- {
- buffer[i] = (char)TestUtil.NextInt(r, 97, 102);
- }
- return new string(buffer, 0, end);
- }
-
- internal static Int32sRef ToIntsRef(string s, int inputMode)
- {
- return ToIntsRef(s, inputMode, new Int32sRef(10));
- }
-
- internal static Int32sRef ToIntsRef(string s, int inputMode, Int32sRef ir)
- {
- if (inputMode == 0)
- {
- // utf8
- return ToIntsRef(new BytesRef(s), ir);
- }
- else
- {
- // utf32
- return ToIntsRefUTF32(s, ir);
- }
- }
-
- internal static Int32sRef ToIntsRefUTF32(string s, Int32sRef ir)
- {
- int charLength = s.Length;
- int charIdx = 0;
- int intIdx = 0;
- while (charIdx < charLength)
- {
- if (intIdx == ir.Int32s.Length)
- {
- ir.Grow(intIdx + 1);
- }
- int utf32 = Character.CodePointAt(s, charIdx);
- ir.Int32s[intIdx] = utf32;
- charIdx += Character.CharCount(utf32);
- intIdx++;
- }
- ir.Length = intIdx;
- return ir;
- }
-
- internal static Int32sRef ToIntsRef(BytesRef br, Int32sRef ir)
- {
- if (br.Length > ir.Int32s.Length)
- {
- ir.Grow(br.Length);
- }
- for (int i = 0; i < br.Length; i++)
- {
- ir.Int32s[i] = br.Bytes[br.Offset + i] & 0xFF;
- }
- ir.Length = br.Length;
- return ir;
- }
-
- /// <summary>
- /// Holds one input/output pair. </summary>
- public class InputOutput<T1> : IComparable<InputOutput<T1>>
- {
- public readonly Int32sRef Input;
- public readonly T1 Output;
-
- public InputOutput(Int32sRef input, T1 output)
- {
- this.Input = input;
- this.Output = output;
- }
-
- public virtual int CompareTo(InputOutput<T1> other)
- {
- return this.Input.CompareTo(other.Input);
- }
- }
-
- public virtual void DoTest(bool testPruning)
- {
- // no pruning
- DoTest(0, 0, true);
-
- if (testPruning)
- {
- // simple pruning
- DoTest(TestUtil.NextInt(Random, 1, 1 + Pairs.Count), 0, true);
-
- // leafy pruning
- DoTest(0, TestUtil.NextInt(Random, 1, 1 + Pairs.Count), true);
- }
- }
-
- // runs the term, returning the output, or null if term
- // isn't accepted. if prefixLength is non-null it must be
- // length 1 int array; prefixLength[0] is set to the length
- // of the term prefix that matches
- private T Run(FST<T> fst, Int32sRef term, int[] prefixLength)
- {
- Debug.Assert(prefixLength == null || prefixLength.Length == 1);
- FST.Arc<T> arc = fst.GetFirstArc(new FST.Arc<T>());
- T NO_OUTPUT = fst.Outputs.NoOutput;
- T output = NO_OUTPUT;
- FST.BytesReader fstReader = fst.GetBytesReader();
-
- for (int i = 0; i <= term.Length; i++)
- {
- int label;
- if (i == term.Length)
- {
- label = FST.END_LABEL;
- }
- else
- {
- label = term.Int32s[term.Offset + i];
- }
- // System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.Outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
- if (fst.FindTargetArc(label, arc, arc, fstReader) == null)
- {
- // System.out.println(" not found");
- if (prefixLength != null)
- {
- prefixLength[0] = i;
- return output;
- }
- else
- {
- return default(T);
- }
- }
- output = fst.Outputs.Add(output, arc.Output);
- }
-
- if (prefixLength != null)
- {
- prefixLength[0] = term.Length;
- }
-
- return output;
- }
-
- private T RandomAcceptedWord(FST<T> fst, Int32sRef @in)
- {
- FST.Arc<T> arc = fst.GetFirstArc(new FST.Arc<T>());
-
- IList<FST.Arc<T>> arcs = new List<FST.Arc<T>>();
- @in.Length = 0;
- @in.Offset = 0;
- T NO_OUTPUT = fst.Outputs.NoOutput;
- T output = NO_OUTPUT;
- FST.BytesReader fstReader = fst.GetBytesReader();
-
- while (true)
- {
- // read all arcs:
- fst.ReadFirstTargetArc(arc, arc, fstReader);
- arcs.Add((new FST.Arc<T>()).CopyFrom(arc));
- while (!arc.IsLast)
- {
- fst.ReadNextArc(arc, fstReader);
- arcs.Add((new FST.Arc<T>()).CopyFrom(arc));
- }
-
- // pick one
- arc = arcs[Random.Next(arcs.Count)];
- arcs.Clear();
-
- // accumulate output
- output = fst.Outputs.Add(output, arc.Output);
-
- // append label
- if (arc.Label == FST.END_LABEL)
- {
- break;
- }
-
- if (@in.Int32s.Length == @in.Length)
- {
- @in.Grow(1 + @in.Length);
- }
- @in.Int32s[@in.Length++] = arc.Label;
- }
-
- return output;
- }
-
- internal virtual FST<T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
- }
-
- bool willRewrite = Random.NextBoolean();
-
- Builder<T> builder = new Builder<T>(InputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
- prune1, prune2,
- prune1 == 0 && prune2 == 0,
- allowRandomSuffixSharing ? Random.NextBoolean() : true,
- allowRandomSuffixSharing ? TestUtil.NextInt(Random, 1, 10) : int.MaxValue,
- Outputs,
- null,
- willRewrite,
- PackedInt32s.DEFAULT,
- true,
- 15);
- if (LuceneTestCase.VERBOSE)
- {
- if (willRewrite)
- {
- Console.WriteLine("TEST: packed FST");
- }
- else
- {
- Console.WriteLine("TEST: non-packed FST");
- }
- }
-
- foreach (InputOutput<T> pair in Pairs)
- {
- if (pair.Output is IEnumerable)
- {
- Builder<object> builderObject = builder as Builder<object>;
- var values = pair.Output as IEnumerable;
- foreach (object value in values)
- {
- builderObject.Add(pair.Input, value);
- }
- }
- else
- {
- builder.Add(pair.Input, pair.Output);
- }
- }
- FST<T> fst = builder.Finish();
-
- if (Random.NextBoolean() && fst != null && !willRewrite)
- {
- IOContext context = LuceneTestCase.NewIOContext(Random);
- using (IndexOutput @out = Dir.CreateOutput("fst.bin", context))
- {
- fst.Save(@out);
- }
- IndexInput @in = Dir.OpenInput("fst.bin", context);
- try
- {
- fst = new FST<T>(@in, Outputs);
- }
- finally
- {
- @in.Dispose();
- Dir.DeleteFile("fst.bin");
- }
- }
-
- if (LuceneTestCase.VERBOSE && Pairs.Count <= 20 && fst != null)
- {
- using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
- {
- Util.ToDot(fst, w, false, false);
- }
- Console.WriteLine("SAVED out.dot");
- }
-
- if (LuceneTestCase.VERBOSE)
- {
- if (fst == null)
- {
- Console.WriteLine(" fst has 0 nodes (fully pruned)");
- }
- else
- {
- Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
- }
- }
-
- if (prune1 == 0 && prune2 == 0)
- {
- VerifyUnPruned(InputMode, fst);
- }
- else
- {
- VerifyPruned(InputMode, fst, prune1, prune2);
- }
-
- return fst;
- }
-
- protected internal virtual bool OutputsEqual(T a, T b)
- {
- // LUCENENET: In .NET, IEnumerables do not automatically test to ensure
- // their values are equal, so we need to do that manually.
- // Note that we are testing the values without regard to whether
- // the enumerable type is nullable.
- return a.ValueEquals(b);
- }
-
- // FST is complete
- private void VerifyUnPruned(int inputMode, FST<T> fst)
- {
- FST<long?> fstLong;
- ISet<long?> validOutputs;
- long minLong = long.MaxValue;
- long maxLong = long.MinValue;
-
- if (DoReverseLookup)
- {
- FST<long?> fstLong0 = fst as FST<long?>;
- fstLong = fstLong0;
- validOutputs = new HashSet<long?>();
- foreach (InputOutput<T> pair in Pairs)
- {
- long? output = pair.Output as long?;
- maxLong = Math.Max(maxLong, output.Value);
- minLong = Math.Min(minLong, output.Value);
- validOutputs.Add(output.Value);
- }
- }
- else
- {
- fstLong = null;
- validOutputs = null;
- }
-
- if (Pairs.Count == 0)
- {
- Assert.IsNull(fst);
- return;
- }
-
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: now verify " + Pairs.Count + " terms");
- foreach (InputOutput<T> pair in Pairs)
- {
- Assert.IsNotNull(pair);
- Assert.IsNotNull(pair.Input);
- Assert.IsNotNull(pair.Output);
- Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output));
- }
- }
-
- Assert.IsNotNull(fst);
-
- // visit valid pairs in order -- make sure all words
- // are accepted, and FSTEnum's next() steps through
- // them correctly
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: check valid terms/next()");
- }
- {
- Int32sRefFSTEnum<T> fstEnum = new Int32sRefFSTEnum<T>(fst);
- foreach (InputOutput<T> pair in Pairs)
- {
- Int32sRef term = pair.Input;
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output));
- }
- T output = Run(fst, term, null);
- Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted");
- Assert.IsTrue(OutputsEqual(pair.Output, output));
-
- // verify enum's next
- Int32sRefFSTEnum.InputOutput<T> t = fstEnum.Next();
- Assert.IsNotNull(t);
- Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input));
- Assert.IsTrue(OutputsEqual(pair.Output, t.Output));
- }
- Assert.IsNull(fstEnum.Next());
- }
-
- IDictionary<Int32sRef, T> termsMap = new Dictionary<Int32sRef, T>();
- foreach (InputOutput<T> pair in Pairs)
- {
- termsMap[pair.Input] = pair.Output;
- }
-
- if (DoReverseLookup && maxLong > minLong)
- {
- // Do random lookups so we test null (output doesn't
- // exist) case:
- Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7));
- Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7));
-
- int num = LuceneTestCase.AtLeast(Random, 100);
- for (int iter = 0; iter < num; iter++)
- {
- long v = TestUtil.NextLong(Random, minLong, maxLong);
- Int32sRef input = Util.GetByOutput(fstLong, v);
- Assert.IsTrue(validOutputs.Contains(v) || input == null);
- }
- }
-
- // find random matching word and make sure it's valid
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: verify random accepted terms");
- }
- Int32sRef scratch = new Int32sRef(10);
- int num_ = LuceneTestCase.AtLeast(Random, 500);
- for (int iter = 0; iter < num_; iter++)
- {
- T output = RandomAcceptedWord(fst, scratch);
- Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid");
- Assert.IsTrue(OutputsEqual(termsMap[scratch], output));
-
- if (DoReverseLookup)
- {
- //System.out.println("lookup output=" + output + " outs=" + fst.Outputs);
- Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value);
- Assert.IsNotNull(input);
- //System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
- Assert.AreEqual(scratch, input);
- }
- }
-
- // test IntsRefFSTEnum.Seek:
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: verify seek");
- }
- Int32sRefFSTEnum<T> fstEnum_ = new Int32sRefFSTEnum<T>(fst);
- num_ = LuceneTestCase.AtLeast(Random, 100);
- for (int iter = 0; iter < num_; iter++)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" iter=" + iter);
- }
- if (Random.NextBoolean())
- {
- // seek to term that doesn't exist:
- while (true)
- {
- Int32sRef term = ToIntsRef(GetRandomString(Random), inputMode);
- int pos = Pairs.BinarySearch(new InputOutput<T>(term, default(T)));
- if (pos < 0)
- {
- pos = -(pos + 1);
- // ok doesn't exist
- //System.out.println(" seek " + inputToString(inputMode, term));
- Int32sRefFSTEnum.InputOutput<T> seekResult;
- if (Random.Next(3) == 0)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do non-exist seekExact term=" + InputToString(inputMode, term));
- }
- seekResult = fstEnum_.SeekExact(term);
- pos = -1;
- }
- else if (Random.NextBoolean())
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do non-exist seekFloor term=" + InputToString(inputMode, term));
- }
- seekResult = fstEnum_.SeekFloor(term);
- pos--;
- }
- else
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do non-exist seekCeil term=" + InputToString(inputMode, term));
- }
- seekResult = fstEnum_.SeekCeil(term);
- }
-
- if (pos != -1 && pos < Pairs.Count)
- {
- //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output));
- Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, Pairs[pos].Input));
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" got " + InputToString(inputMode, seekResult.Input));
- }
- Assert.AreEqual(Pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, Pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input));
- Assert.IsTrue(OutputsEqual(Pairs[pos].Output, seekResult.Output));
- }
- else
- {
- // seeked before start or beyond end
- //System.out.println("seek=" + seekTerm);
- Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input)));
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" got null");
- }
- }
-
- break;
- }
- }
- }
- else
- {
- // seek to term that does exist:
- InputOutput<T> pair = Pairs[Random.Next(Pairs.Count)];
- Int32sRefFSTEnum.InputOutput<T> seekResult;
- if (Random.Next(3) == 2)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do exists seekExact term=" + InputToString(inputMode, pair.Input));
- }
- seekResult = fstEnum_.SeekExact(pair.Input);
- }
- else if (Random.NextBoolean())
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do exists seekFloor " + InputToString(inputMode, pair.Input));
- }
- seekResult = fstEnum_.SeekFloor(pair.Input);
- }
- else
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do exists seekCeil " + InputToString(inputMode, pair.Input));
- }
- seekResult = fstEnum_.SeekCeil(pair.Input);
- }
- Assert.IsNotNull(seekResult);
- Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input));
- Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output));
- }
- }
-
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: mixed next/seek");
- }
-
- // test mixed next/seek
- num_ = LuceneTestCase.AtLeast(Random, 100);
- for (int iter = 0; iter < num_; iter++)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: iter " + iter);
- }
- // reset:
- fstEnum_ = new Int32sRefFSTEnum<T>(fst);
- int upto = -1;
- while (true)
- {
- bool isDone = false;
- if (upto == Pairs.Count - 1 || Random.NextBoolean())
- {
- // next
- upto++;
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do next");
- }
- isDone = fstEnum_.Next() == null;
- }
- else if (upto != -1 && upto < 0.75 * Pairs.Count && Random.NextBoolean())
- {
- int attempt = 0;
- for (; attempt < 10; attempt++)
- {
- Int32sRef term = ToIntsRef(GetRandomString(Random), inputMode);
- if (!termsMap.ContainsKey(term) && term.CompareTo(Pairs[upto].Input) > 0)
- {
- int pos = Pairs.BinarySearch(new InputOutput<T>(term, default(T)));
- Debug.Assert(pos < 0);
- upto = -(pos + 1);
-
- if (Random.NextBoolean())
- {
- upto--;
- Assert.IsTrue(upto != -1);
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do non-exist seekFloor(" + InputToString(inputMode, term) + ")");
- }
- isDone = fstEnum_.SeekFloor(term) == null;
- }
- else
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do non-exist seekCeil(" + InputToString(inputMode, term) + ")");
- }
- isDone = fstEnum_.SeekCeil(term) == null;
- }
-
- break;
- }
- }
- if (attempt == 10)
- {
- continue;
- }
- }
- else
- {
- int inc = Random.Next(Pairs.Count - upto - 1);
- upto += inc;
- if (upto == -1)
- {
- upto = 0;
- }
-
- if (Random.NextBoolean())
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do seekCeil(" + InputToString(inputMode, Pairs[upto].Input) + ")");
- }
- isDone = fstEnum_.SeekCeil(Pairs[upto].Input) == null;
- }
- else
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" do seekFloor(" + InputToString(inputMode, Pairs[upto].Input) + ")");
- }
- isDone = fstEnum_.SeekFloor(Pairs[upto].Input) == null;
- }
- }
- if (LuceneTestCase.VERBOSE)
- {
- if (!isDone)
- {
- Console.WriteLine(" got " + InputToString(inputMode, fstEnum_.Current.Input));
- }
- else
- {
- Console.WriteLine(" got null");
- }
- }
-
- if (upto == Pairs.Count)
- {
- Assert.IsTrue(isDone);
- break;
- }
- else
- {
- Assert.IsFalse(isDone);
- Assert.AreEqual(Pairs[upto].Input, fstEnum_.Current.Input);
- Assert.IsTrue(OutputsEqual(Pairs[upto].Output, fstEnum_.Current.Output));
-
- /*
- if (upto < pairs.size()-1) {
- int tryCount = 0;
- while(tryCount < 10) {
- final IntsRef t = toIntsRef(getRandomString(), inputMode);
- if (pairs.get(upto).input.compareTo(t) < 0) {
- final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
- if (LuceneTestCase.VERBOSE) {
- System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
- }
- Assert.AreEqual(expected, fstEnum.beforeNext(t));
- break;
- }
- tryCount++;
- }
- }
- */
- }
- }
- }
- }
-
- private class CountMinOutput<S>
- {
- internal int Count;
- internal S Output;
- internal S FinalOutput;
- internal bool IsLeaf = true;
- internal bool IsFinal;
- }
-
- // FST is pruned
- private void VerifyPruned(int inputMode, FST<T> fst, int prune1, int prune2)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: now verify pruned " + Pairs.Count + " terms; outputs=" + Outputs);
- foreach (InputOutput<T> pair in Pairs)
- {
- Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output));
- }
- }
-
- // To validate the FST, we brute-force compute all prefixes
- // in the terms, matched to their "common" outputs, prune that
- // set according to the prune thresholds, then assert the FST
- // matches that same set.
-
- // NOTE: Crazy RAM intensive!!
-
- //System.out.println("TEST: tally prefixes");
-
- // build all prefixes
- IDictionary<Int32sRef, CountMinOutput<T>> prefixes = new HashMap<Int32sRef, CountMinOutput<T>>();
- Int32sRef scratch = new Int32sRef(10);
- foreach (InputOutput<T> pair in Pairs)
- {
- scratch.CopyInt32s(pair.Input);
- for (int idx = 0; idx <= pair.Input.Length; idx++)
- {
- scratch.Length = idx;
- CountMinOutput<T> cmo = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
- if (cmo == null)
- {
- cmo = new CountMinOutput<T>();
- cmo.Count = 1;
- cmo.Output = pair.Output;
- prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo;
- }
- else
- {
- cmo.Count++;
- T output1 = cmo.Output;
- if (output1.Equals(Outputs.NoOutput))
- {
- output1 = Outputs.NoOutput;
- }
- T output2 = pair.Output;
- if (output2.Equals(Outputs.NoOutput))
- {
- output2 = Outputs.NoOutput;
- }
- cmo.Output = Outputs.Common(output1, output2);
- }
- if (idx == pair.Input.Length)
- {
- cmo.IsFinal = true;
- cmo.FinalOutput = cmo.Output;
- }
- }
- }
-
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: now prune");
- }
-
-
- // prune 'em
- // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since
- // in .NET you cannot delete records while enumerating forward through a dictionary.
- for (int i = prefixes.Count - 1; i >= 0; i--)
- {
- KeyValuePair<Int32sRef, CountMinOutput<T>> ent = prefixes.ElementAt(i);
- Int32sRef prefix = ent.Key;
- CountMinOutput<T> cmo = ent.Value;
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + Outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal);
- }
- bool keep;
- if (prune1 > 0)
- {
- keep = cmo.Count >= prune1;
- }
- else
- {
- Debug.Assert(prune2 > 0);
- if (prune2 > 1 && cmo.Count >= prune2)
- {
- keep = true;
- }
- else if (prefix.Length > 0)
- {
- // consult our parent
- scratch.Length = prefix.Length - 1;
- Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length);
- CountMinOutput<T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
- //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count));
- keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
- }
- else if (cmo.Count >= prune2)
- {
- keep = true;
- }
- else
- {
- keep = false;
- }
- }
-
- if (!keep)
- {
- prefixes.Remove(prefix);
- //System.out.println(" remove");
- }
- else
- {
- // clear isLeaf for all ancestors
- //System.out.println(" keep");
- scratch.CopyInt32s(prefix);
- scratch.Length--;
- while (scratch.Length >= 0)
- {
- CountMinOutput<T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
- if (cmo2 != null)
- {
- //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch));
- cmo2.IsLeaf = false;
- }
- scratch.Length--;
- }
- }
- }
-
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: after prune");
- foreach (KeyValuePair<Int32sRef, CountMinOutput<T>> ent in prefixes)
- {
- Console.WriteLine(" " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal);
- if (ent.Value.IsFinal)
- {
- Console.WriteLine(" finalOutput=" + Outputs.OutputToString(ent.Value.FinalOutput));
- }
- }
- }
-
- if (prefixes.Count <= 1)
- {
- Assert.IsNull(fst);
- return;
- }
-
- Assert.IsNotNull(fst);
-
- // make sure FST only enums valid prefixes
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: check pruned enum");
- }
- Int32sRefFSTEnum<T> fstEnum = new Int32sRefFSTEnum<T>(fst);
- Int32sRefFSTEnum.InputOutput<T> current;
- while ((current = fstEnum.Next()) != null)
- {
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine(" fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + Outputs.OutputToString(current.Output));
- }
- CountMinOutput<T> cmo = prefixes.ContainsKey(current.Input) ? prefixes[current.Input] : null;
- Assert.IsNotNull(cmo);
- Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal);
- //if (cmo.isFinal && !cmo.isLeaf) {
- if (cmo.IsFinal)
- {
- Assert.AreEqual(cmo.FinalOutput, current.Output);
- }
- else
- {
- Assert.AreEqual(cmo.Output, current.Output);
- }
- }
-
- // make sure all non-pruned prefixes are present in the FST
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: verify all prefixes");
- }
- int[] stopNode = new int[1];
- foreach (KeyValuePair<Int32sRef, CountMinOutput<T>> ent in prefixes)
- {
- if (ent.Key.Length > 0)
- {
- CountMinOutput<T> cmo = ent.Value;
- T output = Run(fst, ent.Key, stopNode);
- if (LuceneTestCase.VERBOSE)
- {
- Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + Outputs.OutputToString(cmo.Output));
- }
- // if (cmo.isFinal && !cmo.isLeaf) {
- if (cmo.IsFinal)
- {
- Assert.AreEqual(cmo.FinalOutput, output);
- }
- else
- {
- Assert.AreEqual(cmo.Output, output);
- }
- Assert.AreEqual(ent.Key.Length, stopNode[0]);
- }
- }
- }
- }
-}
\ No newline at end of file