You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2016/10/23 13:02:33 UTC
[47/50] [abbrv] lucenenet git commit: Ported Analysis.Stempel + tests
(closes #190)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/DataOutputStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/DataOutputStream.cs b/src/Lucene.Net.Core/Support/DataOutputStream.cs
new file mode 100644
index 0000000..518dba7
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/DataOutputStream.cs
@@ -0,0 +1,256 @@
+\ufeffusing System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Java's DataOutputStream is similar to .NET's BinaryWriter. However, it writes
+ /// in a modified UTF-8 format that cannot be read (or duplicated) using BinaryWriter.
+ /// This is a port of DataOutputStream that is fully compatible with Java's DataInputStream.
+ /// </summary>
+ public class DataOutputStream : IDataOutput, IDisposable
+ {
+
+ /// <summary>
+ /// The number of bytes written to the data output stream so far.
+ /// If this counter overflows, it will be wrapped to <see cref="int.MaxValue"/>.
+ /// </summary>
+ protected int written;
+
+ /// <summary>
+ /// bytearr is initialized on demand by writeUTF
+ /// </summary>
+ private byte[] bytearr = null;
+
+
+ private readonly Stream @out;
+
+ /// <summary>
+ /// Creates a new data output stream to write data to the specified
+ /// underlying output stream. The counter <code>written</code> is
+ /// set to zero.
+ /// </summary>
+ /// <param name="out">the underlying output stream, to be saved for later use.</param>
+ public DataOutputStream(Stream @out)
+ {
+ this.@out = @out;
+ }
+
+ /// <summary>
+ /// Increases the written counter by the specified value
+ /// until it reaches <see cref="int.MaxValue"/>.
+ /// </summary>
+ private void IncCount(int value)
+ {
+ int temp = written + value;
+ if (temp < 0)
+ {
+ temp = int.MaxValue;
+ }
+ written = temp;
+ }
+
+ /// <summary>
+ /// Writes the specified byte (the low eight bits of the argument
+ /// <code>b</code>) to the underlying output stream.If no exception
+ /// is thrown, the counter<code>written</code> is incremented by
+ /// <code>1</code>.
+ /// </summary>
+ /// <param name="b">the <code>byte</code> to be written.</param>
+ [MethodImpl(MethodImplOptions.Synchronized)]
+ public virtual void Write(int b)
+ {
+ @out.WriteByte((byte)b);
+ IncCount(1);
+ }
+
+ [MethodImpl(MethodImplOptions.Synchronized)]
+ public virtual void Write(byte[] b, int off, int len)
+ {
+ @out.Write(b, off, len);
+ IncCount(len);
+ }
+
+ public virtual void Flush()
+ {
+ @out.Flush();
+ }
+
+ public void WriteBoolean(bool v)
+ {
+ @out.WriteByte((byte)(v ? 1 : 0));
+ IncCount(1);
+ }
+
+ public void WriteByte(int v)
+ {
+ @out.WriteByte((byte)v);
+ IncCount(1);
+ }
+
+ public void WriteShort(int v)
+ {
+ @out.WriteByte((byte)((int)((uint)v >> 8) & 0xFF));
+ @out.WriteByte((byte)((int)((uint)v >> 0) & 0xFF));
+ IncCount(2);
+ }
+
+ public void WriteChar(int v)
+ {
+ @out.WriteByte((byte)((int)((uint)v >> 8) & 0xFF));
+ @out.WriteByte((byte)((int)((uint)v >> 0) & 0xFF));
+ IncCount(2);
+ }
+
+ public void WriteInt(int v)
+ {
+ @out.WriteByte((byte)(int)(((uint)v >> 24) & 0xFF));
+ @out.WriteByte((byte)(int)(((uint)v >> 16) & 0xFF));
+ @out.WriteByte((byte)(int)(((uint)v >> 8) & 0xFF));
+ @out.WriteByte((byte)(int)(((uint)v >> 0) & 0xFF));
+ IncCount(4);
+ }
+
+ private byte[] writeBuffer = new byte[8];
+
+ public void WriteLong(long v)
+ {
+ writeBuffer[0] = (byte)(long)((ulong)v >> 56);
+ writeBuffer[1] = (byte)(long)((ulong)v >> 48);
+ writeBuffer[2] = (byte)(long)((ulong)v >> 40);
+ writeBuffer[3] = (byte)(long)((ulong)v >> 32);
+ writeBuffer[4] = (byte)(long)((ulong)v >> 24);
+ writeBuffer[5] = (byte)(long)((ulong)v >> 16);
+ writeBuffer[6] = (byte)(long)((ulong)v >> 8);
+ writeBuffer[7] = (byte)(long)((ulong)v >> 0);
+ @out.Write(writeBuffer, 0, 8);
+ IncCount(8);
+ }
+
+ public void WriteFloat(float v)
+ {
+ WriteInt(Number.FloatToIntBits(v));
+ }
+
+ public void WriteDouble(double v)
+ {
+ WriteLong(Number.DoubleToLongBits(v));
+ }
+
+ public void WriteBytes(string s)
+ {
+ int len = s.Length;
+ for (int i = 0; i < len; i++)
+ {
+ @out.WriteByte((byte)s[i]);
+ }
+ IncCount(len);
+ }
+
+ public void WriteChars(string s)
+ {
+ int len = s.Length;
+ for (int i = 0; i < len; i++)
+ {
+ int v = s[i];
+ @out.WriteByte((byte)(int)(((uint)v >> 8) & 0xFF));
+ @out.WriteByte((byte)(int)(((uint)v >> 0) & 0xFF));
+ }
+ IncCount(len * 2);
+ }
+
+ public void WriteUTF(string str)
+ {
+ WriteUTF(str, this);
+ }
+
+ internal static int WriteUTF(string str, IDataOutput @out)
+ {
+ int strlen = str.Length;
+ int utflen = 0;
+ int c, count = 0;
+
+ /* use charAt instead of copying String to char array */
+ for (int i = 0; i < strlen; i++)
+ {
+ c = str[i];
+ if ((c >= 0x0001) && (c <= 0x007F))
+ {
+ utflen++;
+ }
+ else if (c > 0x07FF)
+ {
+ utflen += 3;
+ }
+ else
+ {
+ utflen += 2;
+ }
+ }
+
+ if (utflen > 65535)
+ throw new FormatException(
+ "encoded string too long: " + utflen + " bytes");
+
+ byte[] bytearr = null;
+ if (@out is DataOutputStream) {
+ DataOutputStream dos = (DataOutputStream)@out;
+ if (dos.bytearr == null || (dos.bytearr.Length < (utflen + 2)))
+ dos.bytearr = new byte[(utflen * 2) + 2];
+ bytearr = dos.bytearr;
+ } else {
+ bytearr = new byte[utflen + 2];
+ }
+
+ bytearr[count++] = (byte)(int)(((uint)utflen >> 8) & 0xFF);
+ bytearr[count++] = (byte)(int)(((uint)utflen >> 0) & 0xFF);
+
+ int i2 = 0;
+ for (i2 = 0; i2 < strlen; i2++)
+ {
+ c = str[i2];
+ if (!((c >= 0x0001) && (c <= 0x007F))) break;
+ bytearr[count++] = (byte)c;
+ }
+
+ for (; i2 < strlen; i2++)
+ {
+ c = str[i2];
+ if ((c >= 0x0001) && (c <= 0x007F))
+ {
+ bytearr[count++] = (byte)c;
+
+ }
+ else if (c > 0x07FF)
+ {
+ bytearr[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
+ bytearr[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
+ bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
+ }
+ else
+ {
+ bytearr[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
+ bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
+ }
+ }
+ @out.Write(bytearr, 0, utflen + 2);
+ return utflen + 2;
+ }
+
+
+ #region From FilterOutputStream
+
+ public void Write(byte[] b)
+ {
+ Write(b, 0, b.Length);
+ }
+
+ public void Dispose()
+ {
+ @out.Dispose();
+ }
+
+ #endregion
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/IDataInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/IDataInput.cs b/src/Lucene.Net.Core/Support/IDataInput.cs
new file mode 100644
index 0000000..40d56cf
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/IDataInput.cs
@@ -0,0 +1,24 @@
+\ufeffnamespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Equivalent to Java's DataInput interface
+ /// </summary>
+ public interface IDataInput
+ {
+ void ReadFully(byte[] b);
+ void ReadFully(byte[] b, int off, int len);
+ int SkipBytes(int n);
+ bool ReadBoolean();
+ byte ReadByte();
+ int ReadUnsignedByte();
+ short ReadShort();
+ int ReadUnsignedShort();
+ char ReadChar();
+ int ReadInt();
+ long ReadLong();
+ float ReadFloat();
+ double ReadDouble();
+ string ReadLine();
+ string ReadUTF();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Core/Support/IDataOutput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/IDataOutput.cs b/src/Lucene.Net.Core/Support/IDataOutput.cs
new file mode 100644
index 0000000..6f81351
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/IDataOutput.cs
@@ -0,0 +1,23 @@
+\ufeffnamespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Equivalent to Java's DataOutut interface
+ /// </summary>
+ public interface IDataOutput
+ {
+ void Write(int b);
+ void Write(byte[] b);
+ void Write(byte[] b, int off, int len);
+ void WriteBoolean(bool v);
+ void WriteByte(int v);
+ void WriteShort(int v);
+ void WriteChar(int v);
+ void WriteInt(int v);
+ void WriteLong(long v);
+ void WriteFloat(float v);
+ void WriteDouble(double v);
+ void WriteBytes(string s);
+ void WriteChars(string s);
+ void WriteUTF(string s);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs
new file mode 100644
index 0000000..1c0efc3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestCompile.cs
@@ -0,0 +1,211 @@
+\ufeffusing Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Text;
+
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This software is copyrighted by the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or document. If this license applies to the Egothor project as a
+ whole, the copyright holders are the people or entities mentioned in
+ the file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer that follows these conditions in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived from this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products derived from this software may not be called "Egothor",
+ nor may "Egothor" appear in their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided with the redistribution and/or in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many
+ individuals on behalf of the Egothor Project and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+ public class TestCompile_ : LuceneTestCase
+ {
+ private const string RULES_FILE = "Lucene.Net.Tests.Analysis.Stempel.Egothor.Stemmer.testRules.txt";
+
+ [Test]
+ public void TestCompile()
+ {
+ DirectoryInfo dir = CreateTempDir("testCompile");
+ dir.Create();
+ FileInfo output;
+ using (Stream input = GetType().Assembly.GetManifestResourceStream(RULES_FILE))
+ {
+ output = new FileInfo(Path.Combine(dir.FullName, "testRules.txt"));
+ Copy(input, output);
+ }
+ string path = output.FullName;
+ Compile.Main(new string[] {"test", path });
+ string compiled = path + ".out";
+ Trie trie = LoadTrie(compiled);
+ AssertTrie(trie, path, true, true);
+ AssertTrie(trie, path, false, true);
+ new FileInfo(compiled).Delete();
+ }
+
+ [Test]
+ public void TestCompileBackwards()
+ {
+ DirectoryInfo dir = CreateTempDir("testCompile");
+ dir.Create();
+ FileInfo output;
+ using (Stream input = GetType().Assembly.GetManifestResourceStream(RULES_FILE))
+ {
+ output = new FileInfo(Path.Combine(dir.FullName, "testRules.txt"));
+ Copy(input, output);
+ }
+ string path = output.FullName;
+ Compile.Main(new string[] { "-test", path });
+ string compiled = path + ".out";
+ Trie trie = LoadTrie(compiled);
+ AssertTrie(trie, path, true, true);
+ AssertTrie(trie, path, false, true);
+ new FileInfo(compiled).Delete();
+ }
+
+ [Test]
+ public void TestCompileMulti()
+ {
+ DirectoryInfo dir = CreateTempDir("testCompile");
+ dir.Create();
+ FileInfo output;
+ using (Stream input = GetType().Assembly.GetManifestResourceStream(RULES_FILE))
+ {
+ output = new FileInfo(Path.Combine(dir.FullName, "testRules.txt"));
+ Copy(input, output);
+ }
+ string path = output.FullName;
+ Compile.Main(new string[] { "Mtest", path });
+ string compiled = path + ".out";
+ Trie trie = LoadTrie(compiled);
+ AssertTrie(trie, path, true, true);
+ AssertTrie(trie, path, false, true);
+ new FileInfo(compiled).Delete();
+ }
+
+ internal static Trie LoadTrie(string path)
+ {
+ Trie trie;
+ using (DataInputStream @is = new DataInputStream(
+ new FileStream(path, FileMode.Open, FileAccess.Read)))
+ {
+ string method = @is.ReadUTF().ToUpperInvariant();
+ if (method.IndexOf('M') < 0)
+ {
+ trie = new Trie(@is);
+ }
+ else
+ {
+ trie = new MultiTrie(@is);
+ }
+ }
+ return trie;
+ }
+
+ private static void AssertTrie(Trie trie, string file, bool usefull,
+ bool storeorig)
+ {
+ using (TextReader @in =
+ new StreamReader(new FileStream(file, FileMode.Open), Encoding.UTF8))
+ {
+
+ for (string line = @in.ReadLine(); line != null; line = @in.ReadLine())
+ {
+ try
+ {
+ line = line.ToLowerInvariant();
+ StringTokenizer st = new StringTokenizer(line);
+ string stem = st.NextToken();
+ if (storeorig)
+ {
+ string cmd = (usefull) ? trie.GetFully(stem) : trie
+ .GetLastOnPath(stem);
+ StringBuilder stm = new StringBuilder(stem);
+ Diff.Apply(stm, cmd);
+ assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant());
+ }
+ while (st.HasMoreTokens())
+ {
+ string token = st.NextToken();
+ if (token.Equals(stem))
+ {
+ continue;
+ }
+ string cmd = (usefull) ? trie.GetFully(token) : trie
+ .GetLastOnPath(token);
+ StringBuilder stm = new StringBuilder(token);
+ Diff.Apply(stm, cmd);
+ assertEquals(stem.ToLowerInvariant(), stm.ToString().ToLowerInvariant());
+ }
+ }
+ catch (InvalidOperationException /*x*/)
+ {
+ // no base token (stem) on a line
+ }
+ }
+
+ }
+ }
+
+ private static void Copy(Stream input, FileInfo output)
+ {
+ FileStream os = new FileStream(output.FullName, FileMode.OpenOrCreate, FileAccess.Write);
+ try
+ {
+ byte[] buffer = new byte[1024];
+ int len;
+ while ((len = input.Read(buffer, 0, buffer.Length)) > 0)
+ {
+ os.Write(buffer, 0, len);
+ }
+ }
+ finally
+ {
+ os.Dispose();
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs
new file mode 100644
index 0000000..c5bf1e9
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/TestStemmer.cs
@@ -0,0 +1,191 @@
+\ufeffusing Lucene.Net.Util;
+using NUnit.Framework;
+
+/*
+ Egothor Software License version 1.00
+ Copyright (C) 1997-2004 Leo Galambos.
+ Copyright (C) 2002-2004 "Egothor developers"
+ on behalf of the Egothor Project.
+ All rights reserved.
+
+ This software is copyrighted by the "Egothor developers". If this
+ license applies to a single file or document, the "Egothor developers"
+ are the people or entities mentioned as copyright holders in that file
+ or document. If this license applies to the Egothor project as a
+ whole, the copyright holders are the people or entities mentioned in
+ the file CREDITS. This file can be found in the same location as this
+ license in the distribution.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, the list of contributors, this list of conditions, and the
+ disclaimer that follows these conditions in the documentation
+ and/or other materials provided with the distribution.
+ 3. The name "Egothor" must not be used to endorse or promote products
+ derived from this software without prior written permission. For
+ written permission, please contact Leo.G@seznam.cz
+ 4. Products derived from this software may not be called "Egothor",
+ nor may "Egothor" appear in their name, without prior written
+ permission from Leo.G@seznam.cz.
+
+ In addition, we request that you include in the end-user documentation
+ provided with the redistribution and/or in the software itself an
+ acknowledgement equivalent to the following:
+ "This product includes software developed by the Egothor Project.
+ http://egothor.sf.net/"
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many
+ individuals on behalf of the Egothor Project and was originally
+ created by Leo Galambos (Leo.G@seznam.cz).
+ */
+
+namespace Egothor.Stemmer
+{
+ public class TestStemmer : LuceneTestCase
+ {
+ [Test]
+ public void TestTrie()
+ {
+ Trie t = new Trie(true);
+
+ string[] keys = { "a", "ba", "bb", "c" };
+ string[] vals = { "1", "2", "2", "4" };
+
+ for (int i = 0; i < keys.Length; i++)
+ {
+ t.Add(keys[i], vals[i]);
+ }
+
+ assertEquals(0, t.root);
+ assertEquals(2, t.rows.Count);
+ assertEquals(3, t.cmds.Count);
+ AssertTrieContents(t, keys, vals);
+ }
+
+ [Test]
+ public void TestTrieBackwards()
+ {
+ Trie t = new Trie(false);
+
+ string[] keys = { "a", "ba", "bb", "c" };
+ string[] vals = { "1", "2", "2", "4" };
+
+ for (int i = 0; i < keys.Length; i++)
+ {
+ t.Add(keys[i], vals[i]);
+ }
+
+ AssertTrieContents(t, keys, vals);
+ }
+
+ [Test]
+ public void TestMultiTrie()
+ {
+ Trie t = new MultiTrie(true);
+
+ string[] keys = { "a", "ba", "bb", "c" };
+ string[] vals = { "1", "2", "2", "4" };
+
+ for (int i = 0; i < keys.Length; i++)
+ {
+ t.Add(keys[i], vals[i]);
+ }
+
+ AssertTrieContents(t, keys, vals);
+ }
+
+ [Test]
+ public void TestMultiTrieBackwards()
+ {
+ Trie t = new MultiTrie(false);
+
+ string[] keys = { "a", "ba", "bb", "c" };
+ string[] vals = { "1", "2", "2", "4" };
+
+ for (int i = 0; i < keys.Length; i++)
+ {
+ t.Add(keys[i], vals[i]);
+ }
+
+ AssertTrieContents(t, keys, vals);
+ }
+
+ [Test]
+ public void TestMultiTrie2()
+ {
+ Trie t = new MultiTrie2(true);
+
+ string[] keys = { "a", "ba", "bb", "c" };
+ /*
+ * short vals won't work, see line 155 for example
+ * the IOOBE is caught (wierd), but shouldnt affect patch cmds?
+ */
+ string[] vals = { "1111", "2222", "2223", "4444" };
+
+ for (int i = 0; i < keys.Length; i++)
+ {
+ t.Add(keys[i], vals[i]);
+ }
+
+ AssertTrieContents(t, keys, vals);
+ }
+
+ [Test]
+ public void TestMultiTrie2Backwards()
+ {
+ Trie t = new MultiTrie2(false);
+
+ string[] keys = { "a", "ba", "bb", "c" };
+ /*
+ * short vals won't work, see line 155 for example
+ * the IOOBE is caught (wierd), but shouldnt affect patch cmds?
+ */
+ string[] vals = { "1111", "2222", "2223", "4444" };
+
+ for (int i = 0; i < keys.Length; i++)
+ {
+ t.Add(keys[i], vals[i]);
+ }
+
+ AssertTrieContents(t, keys, vals);
+ }
+
+ private static void AssertTrieContents(Trie trie, string[] keys, string[] vals)
+ {
+ Trie[] tries = new Trie[] {
+ trie,
+ trie.Reduce(new Optimizer()),
+ trie.Reduce(new Optimizer2()),
+ trie.Reduce(new Gener()),
+ trie.Reduce(new Lift(true)),
+ trie.Reduce(new Lift(false))
+ };
+
+ foreach (Trie t in tries)
+ {
+ for (int i = 0; i < keys.Length; i++)
+ {
+ assertEquals(vals[i], t.GetFully(keys[i]).ToString());
+ assertEquals(vals[i], t.GetLastOnPath(keys[i]).ToString());
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt
new file mode 100644
index 0000000..ead2823
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Egothor.Stemmer/testRules.txt
@@ -0,0 +1,4 @@
+act acted acting actor
+walk walked walking
+wander wandered wanderer
+want wanted wanting
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj b/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
new file mode 100644
index 0000000..8be32c0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Lucene.Net.Tests.Analysis.Stempel.csproj
@@ -0,0 +1,89 @@
+\ufeff<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{940A6AB1-F00A-40E2-BC1A-2898EFA8C48F}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Tests.Analysis.Stempel</RootNamespace>
+ <AssemblyName>Lucene.Net.Tests.Analysis.Stempel</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="nunit.framework, Version=2.6.3.13283, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77, processorArchitecture=MSIL">
+ <HintPath>..\..\packages\NUnit.2.6.3\lib\nunit.framework.dll</HintPath>
+ <Private>True</Private>
+ </Reference>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Egothor.Stemmer\TestCompile.cs" />
+ <Compile Include="Egothor.Stemmer\TestStemmer.cs" />
+ <Compile Include="Pl\TestPolishAnalyzer.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Stempel\TestStempelPolishStemFilterFactory.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="packages.config" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+ <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+ <Name>Lucene.Net.Analysis.Common</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Stempel\Lucene.Net.Analysis.Stempel.csproj">
+ <Project>{a76dad88-e3a5-40f9-9114-facd77bd8265}</Project>
+ <Name>Lucene.Net.Analysis.Stempel</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+ <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+ <Name>Lucene.Net.TestFramework</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <EmbeddedResource Include="Egothor.Stemmer\testRules.txt" />
+ </ItemGroup>
+ <ItemGroup />
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs
new file mode 100644
index 0000000..0bd90bf
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Pl/TestPolishAnalyzer.cs
@@ -0,0 +1,102 @@
+\ufeffusing Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Pl
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestPolishAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ [Test]
+ public void TestResourcesAvailable()
+ {
+ new PolishAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming
+ /// </summary>
+ [Test]
+ public void TestBasics()
+ {
+ Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ CheckOneTerm(a, "studenta", "student");
+ CheckOneTerm(a, "studenci", "student");
+ // stopword
+ AssertAnalyzesTo(a, "by\u0142", new String[] { });
+ }
+
+ /// <summary>
+ /// test use of exclusion set
+ /// </summary>
+ [Test]
+ public void TestExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("studenta"), false); ;
+ Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT,
+ PolishAnalyzer.GetDefaultStopSet(), exclusionSet);
+ CheckOneTerm(a, "studenta", "studenta");
+ CheckOneTerm(a, "studenci", "student");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer
+ /// </summary>
+ [Test]
+ public void TestRandomStrings()
+ {
+ CheckRandomData(Random(), new PolishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+
+ /// <summary>
+ /// LUCENENET specific. The original Java implementation relied on String.subSequence(int, int) to throw an IndexOutOfBoundsException
+ /// (in .NET, it would be string.SubString(int, int) and an ArgumentOutOfRangeException).
+ /// However, the logic was corrected for .NET to test when the argument is negative and not
+ /// throw an exception, since exceptions are expensive and not meant for "normal"
+ /// behavior in .NET. This test case was made trying to figure out that issue (since initially an IndexOutOfRangeException,
+ /// rather than ArgumentOutOfRangeException, was in the catch block which made the TestRandomStrings test fail).
+ /// It will trigger the behavior that cause the second substring argument to be negative
+ /// (although that behavior no longer throws an exception).
+ /// </summary>
+ [Test]
+ public void TestOutOfRange()
+ {
+ var a = new PolishAnalyzer(TEST_VERSION_CURRENT);
+ var text = "zyaolz 96619727 p";
+ var reader = new StringReader(text);
+ int remainder = 2;
+ using (var ts = a.TokenStream("dummy", (TextReader)new MockCharFilter(reader, remainder)))
+ {
+ ts.Reset();
+
+ while (ts.IncrementToken())
+ {
+ }
+
+ ts.End();
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..5332d92
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+\ufeffusing System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Analysis.Stempel")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Tests.Analysis.Stempel")]
+[assembly: AssemblyCopyright("Copyright � 2016")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("940a6ab1-f00a-40e2-bc1a-2898efa8c48f")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
new file mode 100644
index 0000000..b6be4af
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/Stempel/TestStempelPolishStemFilterFactory.cs
@@ -0,0 +1,56 @@
+\ufeffusing NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Stempel
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests for <see cref="StempelPolishStemFilterFactory"/>
+ /// </summary>
+ public class TestStempelPolishStemFilterFactory : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestBasics()
+ {
+ TextReader reader = new StringReader("studenta studenci");
+ StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(new Dictionary<string, string>());
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.Create(stream);
+ AssertTokenStreamContents(stream,
+ new string[] { "student", "student" });
+ }
+
+ /** Test that bogus arguments result in exception */
+ [Test]
+ public void TestBogusArguments()
+ {
+ try
+ {
+ new StempelPolishStemFilterFactory(new Dictionary<string, string>() { { "bogusArg", "bogusValue" } });
+ fail();
+ }
+ catch (ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/29525086/src/Lucene.Net.Tests.Analysis.Stempel/packages.config
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/packages.config b/src/Lucene.Net.Tests.Analysis.Stempel/packages.config
new file mode 100644
index 0000000..139d513
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/packages.config
@@ -0,0 +1,4 @@
+\ufeff<?xml version="1.0" encoding="utf-8"?>
+<packages>
+ <package id="NUnit" version="2.6.3" targetFramework="net451" />
+</packages>
\ No newline at end of file