You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2009/11/03 19:06:38 UTC
svn commit: r832486 [3/29] - in /incubator/lucene.net/trunk/C#/src: ./
Demo/DeleteFiles/ Demo/DemoLib/ Demo/IndexFiles/ Demo/IndexHtml/
Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Document/
Lucene.Net/Index/ Lucene.Net/Search/ Lucene....
Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestASCIIFoldingFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestASCIIFoldingFilter.cs?rev=832486&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestASCIIFoldingFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestASCIIFoldingFilter.cs Tue Nov 3 18:06:27 2009
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using NUnit.Framework;
+
+using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
+
+namespace Lucene.Net.Analysis
+{
+
+ [TestFixture]
+ public class TestASCIIFoldingFilter:BaseTokenStreamTestCase
+ {
+
+ // testLain1Accents() is a copy of TestLatin1AccentFilter.testU().
+ [Test]
+ public virtual void testLatin1Accents()
+ {
+ TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés Ã⬠LA CHAÃŽNE Ã⬠�? Ãâ ÃÆ Ãâ Ã⦠Ãâ Ãâ¡ ÃË Ãâ° ÃÅ Ã⹠Ê�? ÃŽ �? ò �? Ãâ" + " Ãâ Ãâ Ãâ Ã⢠Ãâ ÃË Ã
â Þ Ã⢠ÃÅ¡ Ã⺠�? Ã
¸ àá â ã ä ÃÂ¥ æ ç è é ê ë ì àî ï ó" + " ð ñ ò ó ô õ ö ø Ã
â ß þ ù ú û ü ý ÿ �? ï¬â"));
+ ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
+
+ TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute));
+
+ AssertTermEquals("Des", filter, termAtt);
+ AssertTermEquals("mot", filter, termAtt);
+ AssertTermEquals("cles", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("LA", filter, termAtt);
+ AssertTermEquals("CHAINE", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("AE", filter, termAtt);
+ AssertTermEquals("C", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("IJ", filter, termAtt);
+ AssertTermEquals("D", filter, termAtt);
+ AssertTermEquals("N", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("OE", filter, termAtt);
+ AssertTermEquals("TH", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("Y", filter, termAtt);
+ AssertTermEquals("Y", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("ae", filter, termAtt);
+ AssertTermEquals("c", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("ij", filter, termAtt);
+ AssertTermEquals("d", filter, termAtt);
+ AssertTermEquals("n", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("oe", filter, termAtt);
+ AssertTermEquals("ss", filter, termAtt);
+ AssertTermEquals("th", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("y", filter, termAtt);
+ AssertTermEquals("y", filter, termAtt);
+ AssertTermEquals("fi", filter, termAtt);
+ AssertTermEquals("fl", filter, termAtt);
+ Assert.IsFalse(filter.IncrementToken());
+ }
+
+
+ // The following Perl script generated the foldings[] array automatically
+ // from ASCIIFoldingFilter.java:
+ //
+ // ============== begin get.test.cases.pl ==============
+ //
+ // use strict;
+ // use warnings;
+ //
+ // my $file = "ASCIIFoldingFilter.java";
+ // my $output = "testcases.txt";
+ // my %codes = ();
+ // my $folded = '';
+ //
+ // open IN, "<:utf8", $file || die "Error opening input file '$file': $!";
+ // open OUT, ">:utf8", $output || die "Error opening output file '$output': $!";
+ //
+ // while (my $line = <IN>) {
+ // chomp($line);
+ // # case '\u0133': // <char> <maybe URL> [ description ]
+ // if ($line =~ /case\s+'\\u(....)':.*\[([^\]]+)\]/) {
+ // my $code = $1;
+ // my $desc = $2;
+ // $codes{$code} = $desc;
+ // }
+ // # output[outputPos++] = 'A';
+ // elsif ($line =~ /output\[outputPos\+\+\] = '(.+)';/) {
+ // my $output_char = $1;
+ // $folded .= $output_char;
+ // }
+ // elsif ($line =~ /break;/ && length($folded) > 0) {
+ // my $first = 1;
+ // for my $code (sort { hex($a) <=> hex($b) } keys %codes) {
+ // my $desc = $codes{$code};
+ // print OUT ' ';
+ // print OUT '+ ' if (not $first);
+ // $first = 0;
+ // print OUT '"', chr(hex($code)), qq!" // U+$code: $desc\n!;
+ // }
+ // print OUT qq! ,"$folded", // Folded result\n\n!;
+ // %codes = ();
+ // $folded = '';
+ // }
+ // }
+ // close OUT;
+ //
+ // ============== end get.test.cases.pl ==============
+ //
+ [Test]
+ public virtual void testAllFoldings()
+ {
+ // Alternating strings of:
+ // 1. All non-ASCII characters to be folded, concatenated together as a
+ // single string.
+ // 2. The string of ASCII characters to which each of the above
+ // characters should be folded.
+ System.String[] foldings = new System.String[]{"Ãâ¬" + "�?" + "Ãâ" + "ÃÆ" + "Ãâ" + "Ãâ¦" + "Ãâ¬" + "Ãâ" + "Ãâ" + "�?" + "�?" + "Þ" + "à" + "ú" + "Ãâ¬" + "Ãâ" + "æ" + "ú" + "á´â¬" + "á¸â¬" + "Ạ" + "Ả" + "Ấ" + "Ầ" + "Ẩ" + "Ẫ" + "Ậ" + "Ắ" + "Ằ" + "Ẳ" + "Ẵ" + "Ặ" + "ââ¶" + "A", "A", "à" + "á" + "â" + "ã" + "ä" + "ÃÂ¥" + "�?" + "ÃÆ" + "Ãâ¦" + "ÃŽ" + "ß" + "á" + "û" + "�?" + "ÃÆ" + "ç" + "�?" + "Ãâ¢" + "ÃÅ¡" + "�?" + "�?" + "á¶â¢" + "ẚ" + "ạ" + "ả" + "ấ" + "
ầ" + "ẩ" + "ẫ" + "áºÂ" + "ắ" + "ằ" + "ẳ" + "ẵ" + "ặ" + "�?" + "âââ" + "�?" + "â±¥" + "Ɐ" + "�?", "a", "êŲ", "AA", "Ãâ " + "â" + "ü" + "�?", "AE", "êÅ´", "AO", "êŶ", "AU", "êŸ" + "êź", "AV", "êż", "AY", "ââÅ", "(a)", "êų", "aa", "æ" + "ã" + "ý" + "á´â", "ae", "êŵ", "ao", "êÅ·", "au", "êŹ" + "êÅ»", "av", "êŽ", "ay", "�?" + "Ãâ" + "ÃÆ" + "Ãâ¢" + "á´Æ" + "á¸â" + "á¸â" + "á¸â " + "ââ·" + "ï¼¢", "B", "Ãâ¬" + "ÃÆ" + "Ãâ" + "ᵬ" + "á¶â¬" + "á¸Æ" + "á¸â¦" + "á¸â¡" + "ââ
â" + "ï½â", "b", "�?", "(b)", "Ãâ¡" + "Ãâ " + "ÃË" + "ÃÅ " + "ÃÅ" + "Ãâ¡" + "û" + "Ãâ" + "á´â" + "á¸Ë" + "ââ¸" + "ï¼£", "C", "ç" + "Ãâ¡" + "Ãâ°" + "Ãâ¹" + "�?" + "ÃË" + "ü" + "Ãâ¢" + "á¸â°" + "ââ â" + "âââ" + "êž" + "êÅ¿" + "ï½Æ", "c", "ââž", "(c)", "�?" + "ÃŽ" + "�?" + "Ãâ°" + "ÃÅ " + "Ãâ¹" + "á´â¦" + "á´â " + "Ḋ" + "á¸Å" + "Ḏ" + "�?" + "á¸â" + "ââ¹" + "�?�" + "D", "D", "ð" + "�?" + "Ãâ" + "ÃÅ" + "á" + "Ãâ" + "Ãâ" + "áµÂ" + "�?" + "á¶â" + "á¸â¹" + "�?" + "Ã
¯Â¿Â½?" + "á¸â" + "á¸â" + "âââ" + "�?�" + "ï½â", "d", "Ãâ" + "ñ", "DZ", "Ãâ¦" + "ò", "Dz", "ââŸ", "(d)", "ø", "db", "Ãâ " + "ó" + "ã" + "ÃÂ¥", "dz", "ÃË" + "Ãâ°" + "ÃÅ " + "Ãâ¹" + "Ãâ" + "Ãâ" + "Ãâ" + "ÃË" + "ÃÅ¡" + "ÃŽ" + "�?" + "Ãâ" + "Ãâ " + "è" + "Ãâ " + "á´â¡" + "á¸â" + "á¸â" + "á¸Ë" + "Ḛ" + "á¸Å" + "Ẹ" + "Ẻ" + "Ẽ" + "Ế" + "á»â¬" + "á»â" + "á»â" + "á»â " + "ââº" + "â±»" + "ï¼¥", "E", "è" + "é" + "ê" + "ë" + "Ãâ" + "Ãâ¢" + "Ãâ" + "Ãâ¢" + "Ãâº" + "�?" + "Ãâ¦" + "Ãâ
¡" + "é" + "Ãâ¡" + "ÃË" + "Ãâº" + "ÃÅ" + "�?" + "Þ" + "ÃÅ¡" +
+ "á´Ë" + "á¶â" + "á¶â" + "á¶â" + "á¸â¢" + "á¸â" + "á¸â¢" + "á¸âº" + "�?" + "ẹ" + "ẻ" + "ẽ" + "ế" + "�?" + "á»Æ" + "á»â¦" + "á»â¡" + "âââ" + "âââ" + "ⱸ" + "ï½â¦", "e", "ââ ", "(e)", "Ãâ" + "Ḟ" + "ââ»" + "êÅ°" + "�?�" + "ꟻ" + "F", "F", "Ãâ" + "áµ®" + "á¶â" + "ḟ" + "áºâº" + "âââ¢" + "�?�" + "ï½â ", "f", "ââ¡", "(f)", "ï¬â¬", "ff", "ï¬Æ", "ffi", "ï¬â", "ffl", "�?", "fi", "ï¬â", "fl", "ÃÅ" + "Þ" + "à" + "â" + "Ãâ" + "ä" + "ÃÂ¥" + "æ" + "ç" + "ô
" + "â" + "Ãâº" + "Ḡ" + "ââ¼" + "�?�" + "�?�" + "G", "G", "�?" + "ß" + "á" + "ã" + "õ" + "à" + "á" + "áµ·" + "áµ¹" + "á¶Æ" + "ḡ" + "âââ" + "�?�" + "ï½â¡", "g", "ââ¢", "(g)", "ä" + "æ" + "Þ" + "ÃÅ" + "Ḣ" + "Ḥ" + "Ḧ" + "Ḩ" + "Ḫ" + "ââ½" + "Ⱨ" + "â±µ" + "H", "H", "ÃÂ¥" + "ç" + "ß" + "ÃÂ¥" + "æ" + "î" + "ï" + "ḣ" + "ḥ" + "ḧ" + "ḩ" + "ḫ" + "áºâ" + "âââ" + "ⱨ" + "ⱶ" + "ï½Ë", "h", "ö", "HV", "ââ£", "(h)", "Ãâ¢", "hv", "ÃÅ" + "�?" + "
ÃŽ" + "�?" + "è" + "ê" + "ì" + "î" + "ð" + "Ãâ" + "Ãâ" + "�?" + "ÃË" + "ÃÅ " + "ê" + "áµ»" + "Ḭ" + "Ḯ" + "á»Ë" + "Ị" + "ââ¾" + "ꟾ" + "I", "I", "ì" + "ÃÂ" + "î" + "ï" + "é" + "ë" + "ÃÂ" + "ï" + "ñ" + "�?" + "Ãâ°" + "Ãâ¹" + "è" + "á´â°" + "áµ¢" + "áµ¼" + "á¶â" + "á¸Â" + "ḯ" + "á»â°" + "á»â¹" + "�?�" + "ââË" + "ï½â°", "i", "ò", "IJ", "ââ¤", "(i)", "ó", "ij", "ô" + "ÃË" + "á´Š" + "ââ¿" + "J", "J", "õ" + "ð" + "÷" + "Ãâ°" + "ß" + "Ãâ" + "�?" + "âââ¢" + "â�
�±Â¼" + "j", "j", "ââÂ¥", "(j)", "ö" + "ÃË" + "è" + "á´â¹" + "Ḱ" + "Ḳ" + "Ḵ" + "âââ¬" + "Ⱪ" + "�?�" + "�?�" + "�?�" + "K", "K", "÷" + "Ãâ¢" + "é" + "Þ" + "á¶â" + "ḱ" + "ḳ" + "ḵ" + "ââÅ¡" + "ⱪ" + "�??" + "�?�" + "�?�" + "ï½â¹", "k", "ââ¦", "(k)", "ù" + "û" + "ý" + "ÿ" + "�?" + "ý" + "ß" + "á´Å" + "Ḷ" + "Ḹ" + "Ḻ" + "Ḽ" + "�?" + "â± " + "â±¢" + "�?�" + "�?�" + "êžâ¬" + "L", "L", "ú" + "ü" + "þ" + "Ã
â¬" +
"Ã
â" + "ÃÅ¡" + "ô" + "ë" + "ì" + "ÃÂ" + "á¶â¦" + "ḷ" + "ḹ" + "ḻ" + "ḽ" + "âââº" + "ⱡ" +
+ "�?�" + "�?�" + "�?" + "ï½Å", "l", "Ãâ¡", "LJ", "Ỻ", "LL", "ÃË", "Lj", "ââ§", "(l)", "Ãâ°", "lj", "á»»", "ll", "ê", "ls", "ë", "lz", "ÃÅ" + "�?" + "Ḿ" + "á¹â¬" + "á¹â" + "âââ" + "â±®" + "ꟽ" + "ꟿ" + "ï¼Â", "M", "ï" + "ð" + "ñ" + "ᵯ" + "á¶â " + "ḿ" + "�?" + "á¹Æ" + "ââÅ" + "�?", "m", "ââ¨", "(m)", "Ãâ" + "Ã
Æ" + "Ã
â¦" + "Ã
â¡" + "Ã
Å " + "�?" + "ø" + "à" + "ô" + "á´Ž" + "á¹â" + "á¹â " + "á¹Ë" + "Ṋ" + "ââÆ" + "ï¼®", "N", "ñ" + "Ã
â" + "Ã
â " + "Ã
Ë" + "Ã
â°" + "Ã
â¹" + "ÃÅ�
�" + "ù" + "õ" + "ò" + "ó" + "áµ°" + "á¶â¡" + "á¹â¦" + "á¹â¡" + "á¹â°" + "á¹â¹" + "�?�" + "�?" + "n", "n", "ÃÅ ", "NJ", "Ãâ¹", "Nj", "ââ©", "(n)", "ÃÅ", "nj", "Ãâ" + "Ãâ" + "Ãâ" + "Ãâ¢" + "Ãâ" + "ÃË" + "Ã
Å" + "Ã
Ž" + "�?" + "Ãâ " + "ß" + "à" + "Ãâ" + "ê" + "ì" + "þ" + "ÃÅ" + "ÃŽ" + "ê" + "ì" + "î" + "ð" + "�?" + "�?" + "á¹Å" + "Ṏ" + "�?" + "á¹â" + "á»Å" + "Ỏ" + "�?" + "á»â" + "á»â" + "á»â" + "á»Ë" + "Ớ" + "á»Å" + "Ở" + "á» " + "Ợ" + "âââ" + "�?�
" + "�?�" + "O", "O", "ò" + "ó" + "ô" + "õ" + "ö" + "ø" + "�?" + "�?" + "Ã
â" + "á" + "Ãâ" + "ë" + "ÃÂ" + "ÿ" + "�?" + "�?" + "ë" + "ÃÂ" + "ï" + "ñ" + "Ãâ" + "õ" + "á´â" + "á´â" + "á¶â" + "�?" + "�?" + "á¹â" + "á¹â" + "�?" + "�?" + "á»â" + "á»â" + "á»â¢" + "á»â" + "á»â¢" + "á»âº" + "�?" + "ở" + "ỡ" + "ợ" + "âââ" + "ââž" + "ⱺ" + "�?�" + "�??" + "�?", "o", "Ã
â" + "ö", "OE", "�?�", "OO", "â" + "á´â¢", "OU", "ââª", "(o)", "Ã
â" +
"á´â", "oe", "�??", "oo", "ã", "ou", "ä" + "á´Ë" + "á¹â" + "á¹â" + "âââ¦" + "â±£" + "�??" + "�?�" + "�?�" + "ï¼°", "P", "ÃÂ¥" + "áµ±" + "áµ½" + "á¶Ë" + "á¹â¢" + "á¹â" + "ââŸ" + "�?�" + "�?�" + "�?�" + "ꟼ" + "�?", "p", "ââ«", "(p)", "ÃÅ " + "âââ " + "�?�" + "�?�" + "ï¼±", "Q", "ø" + "Ãâ¹" + "à" + "ââ " + "�?�" + "�?�" + "ï½â", "q", "ââ¬", "(q)", "ù", "qp", "Ã
â" + "Ã
â" + "Ã
Ë" + "�?" + "Ãâ" + "ÃÅ" + "Ãâ¬" + "�?" + "á´â¢" +
"á´š" + "á¹Ë" + "Ṛ" + "á¹Å" + "Ṟ" + "âââ¡" + "Ɽ" + "�?�" + "êžâ" + "ï¼²", "R", "Ã
â¢" +
+ "Ã
â" + "Ã
â¢" + "Ãâ" + "Ãâ" + "�?" + "ü" + "ý" + "þ" + "ÿ" + "áµ£" + "áµ²" + "áµ³" + "á¶â°" + "á¹â¢" + "á¹âº" + "�?" + "ṟ" + "ââ¡" + "�?�" + "êžÆ" + "ï½â", "r", "ââÂ", "(r)", "Ã
Å¡" + "Ã
Å" + "Ã
ž" + "Ã
 " + "ÃË" + "á¹ " + "á¹¢" + "Ṥ" + "Ṧ" + "Ṩ" + "ââË" + "êű" + "êžâ¦" + "ï¼³", "S", "Ã
âº" + "�?" + "Ã
Ÿ" + "Ã
¡" + "Ã
¿" + "Ãâ¢" + "ÿ" + "Ãâ" + "áµ´" + "ᶊ" + "ṡ" + "á¹£" + "á¹¥" + "ṧ" + "ṩ" + "áºÅ" + "�?" + "ââ¢" + "êžâ" + "ï½â", "s", "ẞ", "SS", "ââ®", "(
s)", "ß", "ss", "ï¬â ", "st", "Ã
¢" + "Ã
¤" + "Ã
¦" + "ì" + "î" + "ÃÅ¡" + "þ" + "á´âº" + "Ṫ" + "Ṭ" + "á¹®" + "á¹°" + "âââ°" + "êžâ " + "ï¼´", "T", "Ã
£" + "Ã
Â¥" + "Ã
§" + "ë" + "ÃÂ" + "Ãâº" + "ö" + "Ãâ¡" + "ÃË" + "áµµ" + "ṫ" + "á¹Â" + "ṯ" + "á¹±" + "áºâ" + "ââ£" + "ⱦ" + "ï½â", "t", "Þ" + "�?�", "TH", "êŨ", "TZ", "ââ¯", "(t)", "è", "tc", "þ" + "ᵺ" + "�?�", "th", "æ", "ts", "êÅ©", "tz", "Ãâ¢" + "ÃÅ¡" + "Ãâº" + "ÃÅ" + "Ã
¨" + "Ã
ª" + "Ã
¬" + "Ã
®" + "Ã
°" + "Ã
²" + "ï" + "Ãâ" + "Ãâ¢" + "Ãâ" + "Ãâ
¢" + "Ãâº" + "Ãâ" + "Ãâ" + "Ãâ" + "á´Å" + "áµ¾" + "á¹²" + "á¹´" + "Ṷ" + "Ṹ" + "Ṻ" + "Ụ" + "Ủ" + "Ứ" + "Ừ" + "Ử" + "á»®" + "á»°" + "ââÅ " + "ï¼µ", "U", "ù" + "ú" + "û" + "ü" + "Ã
©" + "Ã
«" + "Ã
Â" + "Ã
¯" + "Ã
±" + "Ã
³" + "ð" + "Ãâ" + "Ãâ" + "ÃË" + "ÃÅ¡" + "ÃÅ" + "Ãâ¢" + "Ãâ" + "Ãâ°" + "ᵤ" + "á¶â¢" + "á¹³" + "á¹µ" + "á¹·" + "á¹¹" + "á¹»" + "ụ" + "ủ" + "ứ" + "ừ" + "á»Â" + "ữ" + "á»±" + "ââ¤" + "ï½â¢", "u", "ââ°", "(u)", "ᵫ", "ue", "ò" + "Ãâ¦" + "á´ " + "á¹¼"
+ "á¹¾" + "Ỽ" + "âââ¹" + "�?�" + "�?�" + "V", "V", "Ãâ¹" + "ÃÅ" + "áµ¥" + "á¶Å" + "á¹½" + "ṿ" + "ââÂ¥" + "â±±" + "â±´" + "�?�" + "ï½â", "v", "�?�", "VY", "ââ±", "(v)", "�?�", "vy", "Ã
´" + "÷" + "á´¡" + "áºâ¬" + "áºâ" + "áºâ" + "áºâ " + "áºË" + "ââÅ" + "â±²" + "ï¼·", "W", "Ã
µ" + "ÿ" + "�?" + "�?" + "áºÆ" + "áºâ¦" + "áºâ¡" + "áºâ°" + "áºË" + "ââ¦" + "â±³" + "ï½â", "w", "ââ²", "(w)", "Ẋ" + "áºÅ" + "�?" + "X", "X", "�?" + "áºâ¹" + "�
?" + "âââ" + "ââ§" + "ï½Ë", "x", "ââ³", "(x)", "�?" + "Ã
¶" + "Ã
¸" + "ó" + "ò" + "ÃŽ" +
+ "�?" + "Ẏ" + "Ỳ" + "á»´" + "Ỷ" + "Ỹ" + "Ỿ" + "ââŽ" + "ï¼¹", "Y", "ý" + "ÿ" + "Ã
·" + "ô" + "ó" + "�?" + "ÃŽ" + "�?" + "áºâ¢" + "ỳ" + "ỵ" + "á»·" + "ỹ" + "ỿ" + "ââ¨" + "ï½â¢", "y", "ââ´", "(y)", "Ã
¹" + "Ã
»" + "Ã
½" + "õ" + "ÃÅ" + "ä" + "á´¢" + "�?" + "áºâ" + "áºâ" + "�?" + "Ⱬ" + "�?�" + "Z", "Z", "Ã
º" + "Ã
¼" + "Ã
¾" + "ö" + "�?" + "ÃÂ¥" + "Ãâ¬" + "�?" + "Ãâ" + "ᵶ" + "ᶎ" + "áºâ" + "áºâ" + "áºâ¢" + "ââ©" + "ⱬ" + "�?�" + "z",
"z", "ââµ", "(z)", "�?�" + "âââ¬" + "ââª" + "ââ¿" + "�?", "0", "ù" + "�?" + "ââ " + "ââµ" + "�?�" + "âžâ¬" + "➊" + "ï¼â", "1", "ââË", "1.", "ââ´", "(1)", "ò" + "âââ" + "ââ¡" + "ââ¶" + "�?�" + "�?" + "âžâ¹" + "ï¼â", "2", "âââ°", "2.", "ââµ", "(2)", "ó" + "ââÆ" + "ââ¢" + "ââ·" + "�?�" + "âžâ" + "âžÅ" + "ï¼â", "3", "ââÅ ", "3.", "ââ¶", "(3)", "�?�" + "âââ" + "ââ£" + "ââ¸" + "�?�" + "âžÆ" + "�?" + "ï¼â", "4", "âââ¹", "4.", "ââ·"
, "(4)", "�?�" + "âââ¦" + "ââ¤" + "ââ¹" + "�?�" + "âžâ" + "➎" + "ï¼â¢", "5", "ââÅ", "5.", "ââ¸", "(5)", "�?�" + "âââ " + "ââÂ¥" + "ââº" + "�?�" + "âžâ¦" + "�?" + "ï¼â", "6", "�?", "6.", "ââ¹", "(6)", "�?�" + "âââ¡" + "ââ¦" + "ââ»" + "�?�" + "âžâ " + "�?" + "ï¼â", "7", "ââŽ", "7.", "ââº", "(7)", "�?�" + "ââË" + "ââ§" + "ââ¼" + "�?�" + "âžâ¡" + "âžâ" + "ï¼Ë", "8", "�?", "8.", "ââ»", "(8)", "�?�" + "âââ°" + "ââ¨" + "Ã�
�â½" + "�?�" + "âžË" + "âžâ" + "ï¼â¢", "9", "�?", "9.", "ââ¼", "(9)", "ââ©" + "ââ¾" + "�?�" + "âžâ°" + "âžâ", "10", "âââ", "10.", "ââ½", "(10)", "ââª" + "ââ«", "11", "âââ", "11.", "ââ¾", "(11)", "ââ«" + "ââ¬", "12", "âââ", "12.", "ââ¿", "(12)", "ââ¬" + "ââÂ", "13", "âââ", "13.", "âââ¬", "(13)", "ââÂ" + "ââ®", "14", "âââ¢", "14.", "�?", "(14)", "ââ®" + "ââ¯", "15", "âââ", "15.", "âââ", "(15)", "ââ¯" + "ââ°", "16", "âââ", "16.", "ââÆ", "(16)", "ââ°" + "ââ±", "17", "ââË", "17.", "âââ", "(17)"
, "ââ±" + "ââ²", "18", "âââ¢", "18.", "âââ¦", "(18)", "ââ²" + "ââ³", "19", "ââÅ¡", "19.", "âââ ", "(19)", "ââ³" + "ââ´", "20", "âââº",
+ "20.", "âââ¡", "(20)", "ë" + "û" + "ââ¬Å" + "�?" + "ââ¬Å¾" + "ââ¬Â³" + "ââ¬Â¶" + "�??" + "�?�" + "�?�" + "�?�" + "ï¼â", "\"", "ââ¬Ë" + "ââ¬â¢" + "ââ¬Å¡" + "ââ¬âº" + "ââ¬Â²" + "ââ¬Âµ" + "ââ¬Â¹" + "ââ¬Âº" + "�?�" + "�?�" + "ï¼â¡", "'", "�?" + "ââ¬â" + "ââ¬â" + "ââ¬â" + "ââ¬â" + "�?�" + "âââ¹" + "�?", "-", "�?�" + "�?�" + "ï¼»", "[", "�?�" + "�?�" + "ï¼½", "]", "�?�" + "�?" + "�?�" + "�?�" + "ï¼Ë
", "(", "⸨", "((", "�?�" + "ââŽ" + "�?�" + "�?�" + "ï¼â°", ")", "⸩", "))", "�?�" + "�?�" + "ï¼Å", "<", "�?�" + "�?�" + ">", ">", "�?�" + "ï½âº", "{", "�?�" + "�?", "}", "�?�" + "ââÅ " + "ï¼â¹", "+", "�?�" + "ââÅ" + "�?", "=", "�?", "!", "ââ¬Â¼", "!!", "�?�", "!?", "ï¼Æ", "#", "ï¼â", "$", "�?�" + "ï¼â¦", "%", "ï¼â ", "&", "�?�" + "*", "*", "ï¼Å", ",", ".", ".", "�?�" + "�?", "/", ":", ":", "�??" + "ï¼
âº", ";", "?", "?", "�?�", "??", "�?�", "?!", "ï¼ ", "@", "ï¼¼", "\\", "ââ¬Â¸" + "ï¼¾", "^", "_", "_", "�?�" + "~", "~"};
+
+ // Construct input text and expected output tokens
+ System.Collections.IList expectedOutputTokens = new System.Collections.ArrayList();
+ System.Text.StringBuilder inputText = new System.Text.StringBuilder();
+ for (int n = 0; n < foldings.Length; n += 2)
+ {
+ if (n > 0)
+ {
+ inputText.Append(' '); // Space between tokens
+ }
+ inputText.Append(foldings[n]);
+
+ // Construct the expected output token: the ASCII string to fold to,
+ // duplicated as many times as the number of characters in the input text.
+ System.Text.StringBuilder expected = new System.Text.StringBuilder();
+ int numChars = foldings[n].Length;
+ for (int m = 0; m < numChars; ++m)
+ {
+ expected.Append(foldings[n + 1]);
+ }
+ expectedOutputTokens.Add(expected.ToString());
+ }
+
+ TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(inputText.ToString()));
+ ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
+ TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute));
+ System.Collections.IEnumerator expectedIter = expectedOutputTokens.GetEnumerator();
+ while (expectedIter.MoveNext())
+ {
+ ;
+ AssertTermEquals((System.String) expectedIter.Current, filter, termAtt);
+ }
+ Assert.IsFalse(filter.IncrementToken());
+ }
+
+ internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
+ {
+ Assert.IsTrue(stream.IncrementToken());
+ Assert.AreEqual(expected, termAtt.Term());
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestAnalyzers.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestAnalyzers.cs?rev=832486&r1=832485&r2=832486&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestAnalyzers.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestAnalyzers.cs Tue Nov 3 18:06:27 2009
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,32 +19,24 @@
using NUnit.Framework;
+using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
using StandardTokenizer = Lucene.Net.Analysis.Standard.StandardTokenizer;
+using PayloadAttribute = Lucene.Net.Analysis.Tokenattributes.PayloadAttribute;
+using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
using Payload = Lucene.Net.Index.Payload;
-using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
namespace Lucene.Net.Analysis
{
- [TestFixture]
- public class TestAnalyzers : LuceneTestCase
+ [TestFixture]
+ public class TestAnalyzers:BaseTokenStreamTestCase
{
- public virtual void AssertAnalyzesTo(Analyzer a, System.String input, System.String[] output)
+ public TestAnalyzers(System.String name):base(name)
{
- TokenStream ts = a.TokenStream("dummy", new System.IO.StringReader(input));
- Token reusableToken = new Token();
- for (int i = 0; i < output.Length; i++)
- {
- Token nextToken = ts.Next(reusableToken);
- Assert.IsNotNull(nextToken);
- Assert.AreEqual(nextToken.Term(), output[i]);
- }
- Assert.IsNull(ts.Next(reusableToken));
- ts.Close();
}
- [Test]
+ [Test]
public virtual void TestSimple()
{
Analyzer a = new SimpleAnalyzer();
@@ -58,7 +50,7 @@
AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"quoted", "word"});
}
- [Test]
+ [Test]
public virtual void TestNull()
{
Analyzer a = new WhitespaceAnalyzer();
@@ -72,7 +64,7 @@
AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"\"QUOTED\"", "word"});
}
- [Test]
+ [Test]
public virtual void TestStop()
{
Analyzer a = new StopAnalyzer();
@@ -82,98 +74,92 @@
internal virtual void VerifyPayload(TokenStream ts)
{
- Token reusableToken = new Token();
+ PayloadAttribute payloadAtt = (PayloadAttribute) ts.GetAttribute(typeof(PayloadAttribute));
for (byte b = 1; ; b++)
{
- reusableToken.Clear();
- Token nextToken = ts.Next(reusableToken);
- if (nextToken == null)
+ bool hasNext = ts.IncrementToken();
+ if (!hasNext)
break;
- // System.out.println("id="+System.identityHashCode(nextToken) + " " + nextToken);
+ // System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
// System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
- Assert.AreEqual(b, nextToken.GetPayload().ToByteArray()[0]);
+ Assert.AreEqual(b, payloadAtt.GetPayload().ToByteArray()[0]);
}
}
// Make sure old style next() calls result in a new copy of payloads
- [Test]
+ [Test]
public virtual void TestPayloadCopy()
{
System.String s = "how now brown cow";
TokenStream ts;
ts = new WhitespaceTokenizer(new System.IO.StringReader(s));
- ts = new BuffTokenFilter(ts);
ts = new PayloadSetter(ts);
VerifyPayload(ts);
ts = new WhitespaceTokenizer(new System.IO.StringReader(s));
ts = new PayloadSetter(ts);
- ts = new BuffTokenFilter(ts);
VerifyPayload(ts);
}
-
- // LUCENE-1150: Just a compile time test to ensure the
- // StandardAnalyzer constants remain publicly accessible
- public virtual void _TestStandardConstants()
- {
- int x = StandardTokenizer.ALPHANUM;
- x = StandardTokenizer.APOSTROPHE;
- x = StandardTokenizer.ACRONYM;
- x = StandardTokenizer.COMPANY;
- x = StandardTokenizer.EMAIL;
- x = StandardTokenizer.HOST;
- x = StandardTokenizer.NUM;
- x = StandardTokenizer.CJ;
- string[] y = StandardTokenizer.TOKEN_TYPES;
- }
- }
-
- class BuffTokenFilter : TokenFilter
- {
- internal System.Collections.IList lst;
- public BuffTokenFilter(TokenStream input) : base(input)
- {
+ // LUCENE-1150: Just a compile time test, to ensure the
+ // StandardAnalyzer constants remain publicly accessible
+ public virtual void _testStandardConstants()
+ {
+ int x = StandardTokenizer.ALPHANUM;
+ x = StandardTokenizer.APOSTROPHE;
+ x = StandardTokenizer.ACRONYM;
+ x = StandardTokenizer.COMPANY;
+ x = StandardTokenizer.EMAIL;
+ x = StandardTokenizer.HOST;
+ x = StandardTokenizer.NUM;
+ x = StandardTokenizer.CJ;
+ System.String[] y = StandardTokenizer.TOKEN_TYPES;
}
- public override Token Next(Token reusableToken)
+ private class MyStandardAnalyzer:StandardAnalyzer
{
- if (lst == null)
+ public override TokenStream TokenStream(System.String field, System.IO.TextReader reader)
{
- lst = new System.Collections.ArrayList();
- for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
- {
- lst.Add(nextToken.Clone());
- }
+ return new WhitespaceAnalyzer().TokenStream(field, reader);
}
- object tempObject = lst[0];
- lst.RemoveAt(0);
- return lst.Count == 0 ? null : (Token) tempObject;
+ }
+
+ [Test]
+ public virtual void TestSubclassOverridingOnlyTokenStream()
+ {
+ Analyzer a = new MyStandardAnalyzer();
+ TokenStream ts = a.ReusableTokenStream("field", new System.IO.StringReader("the"));
+ // StandardAnalyzer will discard "the" (it's a
+ // stopword), by my subclass will not:
+ Assert.IsTrue(ts.IncrementToken());
+ Assert.IsFalse(ts.IncrementToken());
}
}
- class PayloadSetter : TokenFilter
+ class PayloadSetter:TokenFilter
{
private void InitBlock()
{
p = new Payload(data, 0, 1);
}
- public PayloadSetter(TokenStream input) : base(input)
+ internal PayloadAttribute payloadAtt;
+ public PayloadSetter(TokenStream input):base(input)
{
InitBlock();
+ payloadAtt = (PayloadAttribute) AddAttribute(typeof(PayloadAttribute));
}
internal byte[] data = new byte[1];
internal Payload p;
- public override Token Next(Token reusableToken)
+ public override bool IncrementToken()
{
- System.Diagnostics.Debug.Assert(reusableToken != null);
- Token nextToken = input.Next(reusableToken);
- if (nextToken == null) return null;
- nextToken.SetPayload(p); // reuse the payload / byte[]
+ bool hasNext = input.IncrementToken();
+ if (!hasNext)
+ return false;
+ payloadAtt.SetPayload(p); // reuse the payload / byte[]
data[0]++;
- return nextToken;
+ return true;
}
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCachingTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCachingTokenFilter.cs?rev=832486&r1=832485&r2=832486&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCachingTokenFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCachingTokenFilter.cs Tue Nov 3 18:06:27 2009
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,6 +19,8 @@
using NUnit.Framework;
+using OffsetAttribute = Lucene.Net.Analysis.Tokenattributes.OffsetAttribute;
+using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using TermVector = Lucene.Net.Documents.Field.TermVector;
@@ -28,14 +30,14 @@
using TermPositions = Lucene.Net.Index.TermPositions;
using Directory = Lucene.Net.Store.Directory;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
-using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
namespace Lucene.Net.Analysis
{
- [TestFixture]
- public class TestCachingTokenFilter : LuceneTestCase
+
+ [TestFixture]
+ public class TestCachingTokenFilter:BaseTokenStreamTestCase
{
- private class AnonymousClassTokenStream : TokenStream
+ private class AnonymousClassTokenStream:TokenStream
{
public AnonymousClassTokenStream(TestCachingTokenFilter enclosingInstance)
{
@@ -44,6 +46,8 @@
private void InitBlock(TestCachingTokenFilter enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
+ termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
+ offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
}
private TestCachingTokenFilter enclosingInstance;
public TestCachingTokenFilter Enclosing_Instance
@@ -55,23 +59,26 @@
}
private int index = 0;
+ private TermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
- public override Token Next(Token reusableToken)
+ public override bool IncrementToken()
{
- System.Diagnostics.Debug.Assert(reusableToken != null);
if (index == Enclosing_Instance.tokens.Length)
{
- return null;
+ return false;
}
else
{
- return reusableToken.Reinit(Enclosing_Instance.tokens[index++], 0, 0);
+ termAtt.SetTermBuffer(Enclosing_Instance.tokens[index++]);
+ offsetAtt.SetOffset(0, 0);
+ return true;
}
}
}
private System.String[] tokens = new System.String[]{"term1", "term2", "term3", "term2"};
- [NUnit.Framework.Test]
+ [Test]
public virtual void TestCaching()
{
Directory dir = new RAMDirectory();
@@ -84,9 +91,9 @@
doc.Add(new Field("preanalyzed", stream, TermVector.NO));
// 1) we consume all tokens twice before we add the doc to the index
- CheckTokens(stream);
+ checkTokens(stream);
stream.Reset();
- CheckTokens(stream);
+ checkTokens(stream);
// 2) now add the document to the index and verify if all tokens are indexed
// don't reset the stream here, the DocumentWriter should do that implicitly
@@ -113,17 +120,19 @@
// 3) reset stream and consume tokens again
stream.Reset();
- CheckTokens(stream);
+ checkTokens(stream);
}
- private void CheckTokens(TokenStream stream)
+ private void checkTokens(TokenStream stream)
{
int count = 0;
- Token reusableToken = new Token();
- for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
+
+ TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute));
+ Assert.IsNotNull(termAtt);
+ while (stream.IncrementToken())
{
Assert.IsTrue(count < tokens.Length);
- Assert.AreEqual(tokens[count], nextToken.Term());
+ Assert.AreEqual(tokens[count], termAtt.Term());
count++;
}
Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharArraySet.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCharArraySet.cs?rev=832486&r1=832485&r2=832486&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharArraySet.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharArraySet.cs Tue Nov 3 18:06:27 2009
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -24,21 +24,25 @@
namespace Lucene.Net.Analysis
{
- [TestFixture]
- public class TestCharArraySet : LuceneTestCase
+ [TestFixture]
+ public class TestCharArraySet:LuceneTestCase
{
- [Test]
+
+ internal static readonly System.String[] TEST_STOP_WORDS = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+
+
+ [Test]
public virtual void TestRehash()
{
CharArraySet cas = new CharArraySet(0, true);
- for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.Length; i++)
- cas.Add(StopAnalyzer.ENGLISH_STOP_WORDS[i]);
- Assert.AreEqual(StopAnalyzer.ENGLISH_STOP_WORDS.Length, cas.Count);
- for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.Length; i++)
- Assert.IsTrue(cas.Contains(StopAnalyzer.ENGLISH_STOP_WORDS[i]));
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
+ cas.Add(TEST_STOP_WORDS[i]);
+ Assert.AreEqual(TEST_STOP_WORDS.Length, cas.Count);
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
+ Assert.IsTrue(cas.Contains(TEST_STOP_WORDS[i]));
}
-
- [Test]
+
+ [Test]
public virtual void TestNonZeroOffset()
{
System.String[] words = new System.String[]{"Hello", "World", "this", "is", "a", "test"};
@@ -47,6 +51,185 @@
for (int i = 0; i < words.Length; i++) { set_Renamed.Add(words[i]); }
Assert.IsTrue(set_Renamed.Contains(findme, 1, 4));
Assert.IsTrue(set_Renamed.Contains(new System.String(findme, 1, 4)));
+
+ // test unmodifiable
+ set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed);
+ Assert.IsTrue(set_Renamed.Contains(findme, 1, 4));
+ Assert.IsTrue(set_Renamed.Contains(new System.String(findme, 1, 4)));
+ }
+
+ [Test]
+ public virtual void TestObjectContains()
+ {
+ CharArraySet set_Renamed = new CharArraySet(10, true);
+ System.Int32 val = 1;
+ set_Renamed.Add((System.Object) val);
+ Assert.IsTrue(set_Renamed.Contains((System.Object) val));
+ Assert.IsTrue(set_Renamed.Contains((System.Object) 1));
+ // test unmodifiable
+ set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed);
+ Assert.IsTrue(set_Renamed.Contains((System.Object) val));
+ Assert.IsTrue(set_Renamed.Contains((System.Object) 1));
+ }
+
+ [Test]
+ public virtual void TestClear()
+ {
+ CharArraySet set_Renamed = new CharArraySet(10, true);
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { set_Renamed.Add(TEST_STOP_WORDS[i]); }
+ Assert.AreEqual(TEST_STOP_WORDS.Length, set_Renamed.Count, "Not all words added");
+ try
+ {
+ set_Renamed.Clear();
+ Assert.Fail("remove is not supported");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.AreEqual(TEST_STOP_WORDS.Length, set_Renamed.Count, "Not all words added");
+ }
+ }
+
+ [Test]
+ public virtual void TestModifyOnUnmodifiable()
+ {
+ CharArraySet set_Renamed = new CharArraySet(10, true);
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { set_Renamed.Add(TEST_STOP_WORDS[i]); }
+ int size = set_Renamed.Count;
+ set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed);
+ Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call");
+ System.String NOT_IN_SET = "SirGallahad";
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String already exists in set");
+
+ try
+ {
+ set_Renamed.Add(NOT_IN_SET.ToCharArray());
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set");
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+
+ try
+ {
+ set_Renamed.Add(NOT_IN_SET);
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set");
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+
+ try
+ {
+ set_Renamed.Add(new System.Text.StringBuilder(NOT_IN_SET));
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set");
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+
+ try
+ {
+ set_Renamed.Clear();
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Changed unmodifiable set");
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+ try
+ {
+ set_Renamed.Add((System.Object) NOT_IN_SET);
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set");
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+ try
+ {
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
+ {
+ if (set_Renamed.Contains(TEST_STOP_WORDS[i]))
+ set_Renamed.Remove(TEST_STOP_WORDS[i]); // {{Aroush-2.9}} this should throw
+ }
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+
+ try
+ {
+ for (int i = 0; i < NOT_IN_SET.Length; i++)
+ {
+ if (!set_Renamed.Contains(NOT_IN_SET[i]))
+ set_Renamed.Remove(NOT_IN_SET[i]); // {{Aroush-2.9}} this should throw
+ }
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.AreEqual(size, set_Renamed.Count, "Size of unmodifiable set has changed");
+ }
+
+ try
+ {
+ for (int i = 0; i < NOT_IN_SET.Length; i++)
+ {
+ if (!set_Renamed.Contains(NOT_IN_SET[i]))
+ set_Renamed.Add(NOT_IN_SET[i]); // {{Aroush-2.9}} this should throw
+ }
+ Assert.Fail("Modified unmodifiable set");
+ }
+ catch (System.NotSupportedException e)
+ {
+ // expected
+ Assert.IsFalse(set_Renamed.Contains(NOT_IN_SET), "Test String has been added to unmodifiable set");
+ }
+
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
+ {
+ Assert.IsTrue(set_Renamed.Contains(TEST_STOP_WORDS[i]));
+ }
+ }
+
+ [Test]
+ public virtual void TestUnmodifiableSet()
+ {
+ CharArraySet set_Renamed = new CharArraySet(10, true);
+ for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
+ if (!set_Renamed.Contains(TEST_STOP_WORDS[i]))
+ set_Renamed.Add(TEST_STOP_WORDS[i]);
+ int size = set_Renamed.Count;
+ set_Renamed = CharArraySet.UnmodifiableSet(set_Renamed);
+ Assert.AreEqual(size, set_Renamed.Count, "Set size changed due to UnmodifiableSet call");
+
+ try
+ {
+ CharArraySet.UnmodifiableSet(null);
+ Assert.Fail("can not make null unmodifiable");
+ }
+ catch (System.NullReferenceException e)
+ {
+ // expected
+ }
}
}
}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCharFilter.cs?rev=832486&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharFilter.cs Tue Nov 3 18:06:27 2009
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using NUnit.Framework;
+
+using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+namespace Lucene.Net.Analysis
+{
+
+ [TestFixture]
+ public class TestCharFilter:LuceneTestCase
+ {
+
+ [Test]
+ public virtual void TestCharFilter1()
+ {
+ CharStream cs = new CharFilter1(CharReader.Get(new System.IO.StringReader("")));
+ Assert.AreEqual(1, cs.CorrectOffset(0), "corrected offset is invalid");
+ }
+
+ [Test]
+ public virtual void TestCharFilter2()
+ {
+ CharStream cs = new CharFilter2(CharReader.Get(new System.IO.StringReader("")));
+ Assert.AreEqual(2, cs.CorrectOffset(0), "corrected offset is invalid");
+ }
+
+ [Test]
+ public virtual void TestCharFilter12()
+ {
+ CharStream cs = new CharFilter2(new CharFilter1(CharReader.Get(new System.IO.StringReader(""))));
+ Assert.AreEqual(3, cs.CorrectOffset(0), "corrected offset is invalid");
+ }
+
+ [Test]
+ public virtual void TestCharFilter11()
+ {
+ CharStream cs = new CharFilter1(new CharFilter1(CharReader.Get(new System.IO.StringReader(""))));
+ Assert.AreEqual(2, cs.CorrectOffset(0), "corrected offset is invalid");
+ }
+
+ internal class CharFilter1:CharFilter
+ {
+
+ protected internal CharFilter1(CharStream in_Renamed):base(in_Renamed)
+ {
+ }
+
+ public /*protected internal*/ override int Correct(int currentOff)
+ {
+ return currentOff + 1;
+ }
+ }
+
+ internal class CharFilter2:CharFilter
+ {
+
+ protected internal CharFilter2(CharStream in_Renamed):base(in_Renamed)
+ {
+ }
+
+ public /*protected internal*/ override int Correct(int currentOff)
+ {
+ return currentOff + 2;
+ }
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharacterCache.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestCharacterCache.cs?rev=832486&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharacterCache.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestCharacterCache.cs Tue Nov 3 18:06:27 2009
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using NUnit.Framework;
+
+using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+namespace Lucene.Net.Analysis
+{
+ /// <summary> Testcase for {@link CharacterCache}</summary>
+ [TestFixture]
+ public class TestCharacterCache:LuceneTestCase
+ {
+
+ [Test]
+ public virtual void TestValueOf()
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ System.Char valueOf = CharacterCache.ValueOf((char) i);
+ Assert.AreEqual((char) i, valueOf);
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestISOLatin1AccentFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestISOLatin1AccentFilter.cs?rev=832486&r1=832485&r2=832486&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestISOLatin1AccentFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestISOLatin1AccentFilter.cs Tue Nov 3 18:06:27 2009
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,96 +19,102 @@
using NUnit.Framework;
-using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
namespace Lucene.Net.Analysis
{
- [TestFixture]
- public class TestISOLatin1AccentFilter : LuceneTestCase
+ [TestFixture]
+ public class TestISOLatin1AccentFilter:BaseTokenStreamTestCase
{
- [Test]
+ [Test]
public virtual void TestU()
{
- TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés à LA CHAÃNE à à à à à Ã
à à à à à à à à à à IJ à à à à à à à à Šà à à à à à Ÿ à á â ã ä Ã¥ æ ç è é ê ë ì à î ï ij ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ ï¬ ï¬"));
+ TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("Des mot clés à LA CHAÃNE à �? à à à Ã
à à à à à à à �? à �? IJ �? à à à à à à à Šà à à à à �? Ÿ à á â ã ä Ã¥ æ ç è é ê ë ì à î ï ij ð ñ ò ó ô õ ö ø Šà þ ù ú û ü ý ÿ �? ï¬"));
ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream);
- Token reusableToken = new Token();
- Assert.AreEqual("Des", filter.Next(reusableToken).Term());
- Assert.AreEqual("mot", filter.Next(reusableToken).Term());
- Assert.AreEqual("cles", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("LA", filter.Next(reusableToken).Term());
- Assert.AreEqual("CHAINE", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("A", filter.Next(reusableToken).Term());
- Assert.AreEqual("AE", filter.Next(reusableToken).Term());
- Assert.AreEqual("C", filter.Next(reusableToken).Term());
- Assert.AreEqual("E", filter.Next(reusableToken).Term());
- Assert.AreEqual("E", filter.Next(reusableToken).Term());
- Assert.AreEqual("E", filter.Next(reusableToken).Term());
- Assert.AreEqual("E", filter.Next(reusableToken).Term());
- Assert.AreEqual("I", filter.Next(reusableToken).Term());
- Assert.AreEqual("I", filter.Next(reusableToken).Term());
- Assert.AreEqual("I", filter.Next(reusableToken).Term());
- Assert.AreEqual("I", filter.Next(reusableToken).Term());
- Assert.AreEqual("IJ", filter.Next(reusableToken).Term());
- Assert.AreEqual("D", filter.Next(reusableToken).Term());
- Assert.AreEqual("N", filter.Next(reusableToken).Term());
- Assert.AreEqual("O", filter.Next(reusableToken).Term());
- Assert.AreEqual("O", filter.Next(reusableToken).Term());
- Assert.AreEqual("O", filter.Next(reusableToken).Term());
- Assert.AreEqual("O", filter.Next(reusableToken).Term());
- Assert.AreEqual("O", filter.Next(reusableToken).Term());
- Assert.AreEqual("O", filter.Next(reusableToken).Term());
- Assert.AreEqual("OE", filter.Next(reusableToken).Term());
- Assert.AreEqual("TH", filter.Next(reusableToken).Term());
- Assert.AreEqual("U", filter.Next(reusableToken).Term());
- Assert.AreEqual("U", filter.Next(reusableToken).Term());
- Assert.AreEqual("U", filter.Next(reusableToken).Term());
- Assert.AreEqual("U", filter.Next(reusableToken).Term());
- Assert.AreEqual("Y", filter.Next(reusableToken).Term());
- Assert.AreEqual("Y", filter.Next(reusableToken).Term());
- Assert.AreEqual("a", filter.Next(reusableToken).Term());
- Assert.AreEqual("a", filter.Next(reusableToken).Term());
- Assert.AreEqual("a", filter.Next(reusableToken).Term());
- Assert.AreEqual("a", filter.Next(reusableToken).Term());
- Assert.AreEqual("a", filter.Next(reusableToken).Term());
- Assert.AreEqual("a", filter.Next(reusableToken).Term());
- Assert.AreEqual("ae", filter.Next(reusableToken).Term());
- Assert.AreEqual("c", filter.Next(reusableToken).Term());
- Assert.AreEqual("e", filter.Next(reusableToken).Term());
- Assert.AreEqual("e", filter.Next(reusableToken).Term());
- Assert.AreEqual("e", filter.Next(reusableToken).Term());
- Assert.AreEqual("e", filter.Next(reusableToken).Term());
- Assert.AreEqual("i", filter.Next(reusableToken).Term());
- Assert.AreEqual("i", filter.Next(reusableToken).Term());
- Assert.AreEqual("i", filter.Next(reusableToken).Term());
- Assert.AreEqual("i", filter.Next(reusableToken).Term());
- Assert.AreEqual("ij", filter.Next(reusableToken).Term());
- Assert.AreEqual("d", filter.Next(reusableToken).Term());
- Assert.AreEqual("n", filter.Next(reusableToken).Term());
- Assert.AreEqual("o", filter.Next(reusableToken).Term());
- Assert.AreEqual("o", filter.Next(reusableToken).Term());
- Assert.AreEqual("o", filter.Next(reusableToken).Term());
- Assert.AreEqual("o", filter.Next(reusableToken).Term());
- Assert.AreEqual("o", filter.Next(reusableToken).Term());
- Assert.AreEqual("o", filter.Next(reusableToken).Term());
- Assert.AreEqual("oe", filter.Next(reusableToken).Term());
- Assert.AreEqual("ss", filter.Next(reusableToken).Term());
- Assert.AreEqual("th", filter.Next(reusableToken).Term());
- Assert.AreEqual("u", filter.Next(reusableToken).Term());
- Assert.AreEqual("u", filter.Next(reusableToken).Term());
- Assert.AreEqual("u", filter.Next(reusableToken).Term());
- Assert.AreEqual("u", filter.Next(reusableToken).Term());
- Assert.AreEqual("y", filter.Next(reusableToken).Term());
- Assert.AreEqual("y", filter.Next(reusableToken).Term());
- Assert.AreEqual("fi", filter.Next(reusableToken).Term());
- Assert.AreEqual("fl", filter.Next(reusableToken).Term());
- Assert.IsNull(filter.Next(reusableToken));
+ TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute));
+ AssertTermEquals("Des", filter, termAtt);
+ AssertTermEquals("mot", filter, termAtt);
+ AssertTermEquals("cles", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("LA", filter, termAtt);
+ AssertTermEquals("CHAINE", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("A", filter, termAtt);
+ AssertTermEquals("AE", filter, termAtt);
+ AssertTermEquals("C", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("E", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("I", filter, termAtt);
+ AssertTermEquals("IJ", filter, termAtt);
+ AssertTermEquals("D", filter, termAtt);
+ AssertTermEquals("N", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("O", filter, termAtt);
+ AssertTermEquals("OE", filter, termAtt);
+ AssertTermEquals("TH", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("U", filter, termAtt);
+ AssertTermEquals("Y", filter, termAtt);
+ AssertTermEquals("Y", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("a", filter, termAtt);
+ AssertTermEquals("ae", filter, termAtt);
+ AssertTermEquals("c", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("e", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("i", filter, termAtt);
+ AssertTermEquals("ij", filter, termAtt);
+ AssertTermEquals("d", filter, termAtt);
+ AssertTermEquals("n", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("o", filter, termAtt);
+ AssertTermEquals("oe", filter, termAtt);
+ AssertTermEquals("ss", filter, termAtt);
+ AssertTermEquals("th", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("u", filter, termAtt);
+ AssertTermEquals("y", filter, termAtt);
+ AssertTermEquals("y", filter, termAtt);
+ AssertTermEquals("fi", filter, termAtt);
+ AssertTermEquals("fl", filter, termAtt);
+ Assert.IsFalse(filter.IncrementToken());
+ }
+
+ internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt)
+ {
+ Assert.IsTrue(stream.IncrementToken());
+ Assert.AreEqual(expected, termAtt.Term());
}
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestKeywordAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestKeywordAnalyzer.cs?rev=832486&r1=832485&r2=832486&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestKeywordAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestKeywordAnalyzer.cs Tue Nov 3 18:06:27 2009
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,6 +19,7 @@
using NUnit.Framework;
+using OffsetAttribute = Lucene.Net.Analysis.Tokenattributes.OffsetAttribute;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexReader = Lucene.Net.Index.IndexReader;
@@ -27,28 +28,27 @@
using TermDocs = Lucene.Net.Index.TermDocs;
using QueryParser = Lucene.Net.QueryParsers.QueryParser;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
-using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using IndexSearcher = Lucene.Net.Search.IndexSearcher;
using Query = Lucene.Net.Search.Query;
-using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+using ScoreDoc = Lucene.Net.Search.ScoreDoc;
namespace Lucene.Net.Analysis
{
- [TestFixture]
- public class TestKeywordAnalyzer : LuceneTestCase
+ [TestFixture]
+ public class TestKeywordAnalyzer:BaseTokenStreamTestCase
{
private RAMDirectory directory;
private IndexSearcher searcher;
[SetUp]
- public override void SetUp()
+ public override void SetUp()
{
base.SetUp();
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
-//writer.SetInfoStream(System.Console.Out);
+
Document doc = new Document();
doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED));
@@ -58,32 +58,14 @@
searcher = new IndexSearcher(directory);
}
-
- //[Test]
- //public void TestSameThreadConsecutive()
- //{
- // TestMultipleDocument();
- // TestPerFieldAnalyzer();
- //}
-
- //[Test]
- //public void TestDistinctThreadConsecutive()
- //{
- // SupportClass.ThreadClass thread1 = new SupportClass.ThreadClass(new System.Threading.ThreadStart(TestMultipleDocument));
- // thread1.Start();
- // System.Threading.Thread.CurrentThread.Join();
- // SupportClass.ThreadClass thread2 = new SupportClass.ThreadClass(new System.Threading.ThreadStart(TestPerFieldAnalyzer));
- // thread2.Start();
- // System.Threading.Thread.CurrentThread.Join();
- //}
-
+
[Test]
public virtual void TestPerFieldAnalyzer()
{
- PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
+ PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
analyzer.AddAnalyzer("partnum", new KeywordAnalyzer());
-
- Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("description", analyzer);
+
+ QueryParser queryParser = new QueryParser("description", analyzer);
Query query = queryParser.Parse("partnum:Q36 AND SPACE");
ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
@@ -91,8 +73,8 @@
Assert.AreEqual(1, hits.Length, "doc found!");
}
- [Test]
- public virtual void TestMultipleDocument()
+ [Test]
+ public virtual void TestMutipleDocument()
{
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
@@ -104,14 +86,22 @@
writer.AddDocument(doc);
writer.Close();
- IndexReader reader = IndexReader.Open(dir);
- // following is the line whose inclusion causes TestPerFieldAnalyzer to fail:
- TermDocs td = reader.TermDocs(new Term("partnum", "Q36"));
- Assert.IsTrue(td.Next());
- td = reader.TermDocs(new Term("partnum", "Q37"));
- Assert.IsTrue(td.Next());
-//this fixes TestPerFieldAnalyzer:
-//((Lucene.Net.Index.SegmentReader)reader).foo();
+ IndexReader reader = IndexReader.Open(dir);
+ TermDocs td = reader.TermDocs(new Term("partnum", "Q36"));
+ Assert.IsTrue(td.Next());
+ td = reader.TermDocs(new Term("partnum", "Q37"));
+ Assert.IsTrue(td.Next());
+ }
+
+ // LUCENE-1441
+ [Test]
+ public virtual void TestOffsets()
+ {
+ TokenStream stream = new KeywordAnalyzer().TokenStream("field", new System.IO.StringReader("abcd"));
+ OffsetAttribute offsetAtt = (OffsetAttribute) stream.AddAttribute(typeof(OffsetAttribute));
+ Assert.IsTrue(stream.IncrementToken());
+ Assert.AreEqual(0, offsetAtt.StartOffset());
+ Assert.AreEqual(4, offsetAtt.EndOffset());
}
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestLengthFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestLengthFilter.cs?rev=832486&r1=832485&r2=832486&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestLengthFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestLengthFilter.cs Tue Nov 3 18:06:27 2009
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -19,24 +19,29 @@
using NUnit.Framework;
-using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
namespace Lucene.Net.Analysis
{
- [TestFixture]
- public class TestLengthFilter : LuceneTestCase
+ [TestFixture]
+ public class TestLengthFilter:BaseTokenStreamTestCase
{
- [Test]
+
+ [Test]
public virtual void TestFilter()
{
TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("short toolong evenmuchlongertext a ab toolong foo"));
LengthFilter filter = new LengthFilter(stream, 2, 6);
- Token reusableToken = new Token();
- Assert.AreEqual("short", filter.Next(reusableToken).Term());
- Assert.AreEqual("ab", filter.Next(reusableToken).Term());
- Assert.AreEqual("foo", filter.Next(reusableToken).Term());
- Assert.IsNull(filter.Next(reusableToken));
+ TermAttribute termAtt = (TermAttribute) filter.GetAttribute(typeof(TermAttribute));
+
+ Assert.IsTrue(filter.IncrementToken());
+ Assert.AreEqual("short", termAtt.Term());
+ Assert.IsTrue(filter.IncrementToken());
+ Assert.AreEqual("ab", termAtt.Term());
+ Assert.IsTrue(filter.IncrementToken());
+ Assert.AreEqual("foo", termAtt.Term());
+ Assert.IsFalse(filter.IncrementToken());
}
}
}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestMappingCharFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestMappingCharFilter.cs?rev=832486&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestMappingCharFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestMappingCharFilter.cs Tue Nov 3 18:06:27 2009
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis
+{
+
+ [TestFixture]
+ public class TestMappingCharFilter:BaseTokenStreamTestCase
+ {
+
+ internal NormalizeCharMap normMap;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ normMap = new NormalizeCharMap();
+
+ normMap.Add("aa", "a");
+ normMap.Add("bbb", "b");
+ normMap.Add("cccc", "cc");
+
+ normMap.Add("h", "i");
+ normMap.Add("j", "jj");
+ normMap.Add("k", "kkk");
+ normMap.Add("ll", "llll");
+
+ normMap.Add("empty", "");
+ }
+
+ [Test]
+ public virtual void TestReaderReset()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("x")));
+ char[] buf = new char[10];
+ int len = cs.Read(buf, 0, 10);
+ Assert.AreEqual(1, len);
+ Assert.AreEqual('x', buf[0]);
+ len = cs.Read(buf, 0, 10);
+ Assert.AreEqual(- 1, len);
+
+ // rewind
+ cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("x")));
+ len = cs.Read(buf, 0, 10);
+ Assert.AreEqual(1, len);
+ Assert.AreEqual('x', buf[0]);
+ }
+
+ [Test]
+ public virtual void TestNothingChange()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("x")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"x"}, new int[]{0}, new int[]{1});
+ }
+
+ [Test]
+ public virtual void Test1to1()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("h")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"i"}, new int[]{0}, new int[]{1});
+ }
+
+ [Test]
+ public virtual void Test1to2()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("j")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"jj"}, new int[]{0}, new int[]{1});
+ }
+
+ [Test]
+ public virtual void Test1to3()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("k")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"kkk"}, new int[]{0}, new int[]{1});
+ }
+
+ [Test]
+ public virtual void Test2to4()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("ll")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"llll"}, new int[]{0}, new int[]{2});
+ }
+
+ [Test]
+ public virtual void Test2to1()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("aa")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"a"}, new int[]{0}, new int[]{2});
+ }
+
+ [Test]
+ public virtual void Test3to1()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("bbb")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"b"}, new int[]{0}, new int[]{3});
+ }
+
+ [Test]
+ public virtual void Test4to2()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("cccc")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"cc"}, new int[]{0}, new int[]{4});
+ }
+
+ [Test]
+ public virtual void Test5to0()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("empty")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[0]);
+ }
+
+ //
+ // 1111111111222
+ // 01234567890123456789012
+ //(in) h i j k ll cccc bbb aa
+ //
+ // 1111111111222
+ // 01234567890123456789012
+ //(out) i i jj kkk llll cc b a
+ //
+ // h, 0, 1 => i, 0, 1
+ // i, 2, 3 => i, 2, 3
+ // j, 4, 5 => jj, 4, 5
+ // k, 6, 7 => kkk, 6, 7
+ // ll, 8,10 => llll, 8,10
+ // cccc,11,15 => cc,11,15
+ // bbb,16,19 => b,16,19
+ // aa,20,22 => a,20,22
+ //
+ [Test]
+ public virtual void TestTokenStream()
+ {
+ CharStream cs = new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("h i j k ll cccc bbb aa")));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"i", "i", "jj", "kkk", "llll", "cc", "b", "a"}, new int[]{0, 2, 4, 6, 8, 11, 16, 20}, new int[]{1, 3, 5, 7, 10, 15, 19, 22});
+ }
+
+ //
+ //
+ // 0123456789
+ //(in) aaaa ll h
+ //(out-1) aa llll i
+ //(out-2) a llllllll i
+ //
+ // aaaa,0,4 => a,0,4
+ // ll,5,7 => llllllll,5,7
+ // h,8,9 => i,8,9
+ [Test]
+ public virtual void TestChained()
+ {
+ CharStream cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, CharReader.Get(new System.IO.StringReader("aaaa ll h"))));
+ TokenStream ts = new WhitespaceTokenizer(cs);
+ AssertTokenStreamContents(ts, new System.String[]{"a", "llllllll", "i"}, new int[]{0, 5, 8}, new int[]{4, 7, 9});
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Test/Analysis/TestNumericTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/Analysis/TestNumericTokenStream.cs?rev=832486&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/Analysis/TestNumericTokenStream.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Test/Analysis/TestNumericTokenStream.cs Tue Nov 3 18:06:27 2009
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using NUnit.Framework;
+
+using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
+using TypeAttribute = Lucene.Net.Analysis.Tokenattributes.TypeAttribute;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Analysis
+{
+
+ [TestFixture]
+ public class TestNumericTokenStream:BaseTokenStreamTestCase
+ {
+
+ internal const long lvalue = 4573245871874382L;
+ internal const int ivalue = 123456;
+
+ [Test]
+ public virtual void TestLongStream()
+ {
+ NumericTokenStream stream = new NumericTokenStream().SetLongValue(lvalue);
+ // use getAttribute to test if attributes really exist, if not an IAE will be throwed
+ TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute));
+ TypeAttribute typeAtt = (TypeAttribute) stream.GetAttribute(typeof(TypeAttribute));
+ for (int shift = 0; shift < 64; shift += NumericUtils.PRECISION_STEP_DEFAULT)
+ {
+ Assert.IsTrue(stream.IncrementToken(), "New token is available");
+ Assert.AreEqual(NumericUtils.LongToPrefixCoded(lvalue, shift), termAtt.Term(), "Term is correctly encoded");
+ Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct");
+ }
+ Assert.IsFalse(stream.IncrementToken(), "No more tokens available");
+ }
+
+ [Test]
+ public virtual void TestIntStream()
+ {
+ NumericTokenStream stream = new NumericTokenStream().SetIntValue(ivalue);
+ // use getAttribute to test if attributes really exist, if not an IAE will be throwed
+ TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute));
+ TypeAttribute typeAtt = (TypeAttribute) stream.GetAttribute(typeof(TypeAttribute));
+ for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT)
+ {
+ Assert.IsTrue(stream.IncrementToken(), "New token is available");
+ Assert.AreEqual(NumericUtils.IntToPrefixCoded(ivalue, shift), termAtt.Term(), "Term is correctly encoded");
+ Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct");
+ }
+ Assert.IsFalse(stream.IncrementToken(), "No more tokens available");
+ }
+
+ [Test]
+ public virtual void TestNotInitialized()
+ {
+ NumericTokenStream stream = new NumericTokenStream();
+
+ try
+ {
+ stream.Reset();
+ Assert.Fail("reset() should not succeed.");
+ }
+ catch (System.SystemException e)
+ {
+ // pass
+ }
+
+ try
+ {
+ stream.IncrementToken();
+ Assert.Fail("incrementToken() should not succeed.");
+ }
+ catch (System.SystemException e)
+ {
+ // pass
+ }
+ }
+ }
+}
\ No newline at end of file