You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/12/10 19:39:14 UTC
[25/27] lucenenet git commit: adding converted analysis common tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs
new file mode 100644
index 0000000..81bdd31
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs
@@ -0,0 +1,570 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using TestUtil = org.apache.lucene.util.TestUtil;
+
+ public class HTMLStripCharFilterTest : BaseTokenStreamTestCase
+ {
+
+ private static Analyzer newTestAnalyzer()
+ {
+ return new AnalyzerAnonymousInnerClassHelper();
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ return new HTMLStripCharFilter(reader);
+ }
+ }
+
+ //this is some text here is a link and another link . This is an entity: & plus a <. Here is an &
+ //
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+ public virtual void test()
+ {
+ string html = "<div class=\"foo\">this is some text</div> here is a <a href=\"#bar\">link</a> and " + "another <a href=\"http://lucene.apache.org/\">link</a>. " + "This is an entity: & plus a <. Here is an &. <!-- is a comment -->";
+ string gold = "\nthis is some text\n here is a link and " + "another link. " + "This is an entity: & plus a <. Here is an &. ";
+ assertHTMLStripsTo(html, gold, null);
+ }
+
+ //Some sanity checks, but not a full-fledged check
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHTML() throws Exception
+ public virtual void testHTML()
+ {
+ System.IO.Stream stream = this.GetType().getResourceAsStream("htmlStripReaderTest.html");
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
+ StringBuilder builder = new StringBuilder();
+ int ch = -1;
+ while ((ch = reader.read()) != -1)
+ {
+ builder.Append((char)ch);
+ }
+ string str = builder.ToString();
+ assertTrue("Entity not properly escaped", str.IndexOf("<", StringComparison.Ordinal) == -1); //there is one > in the text
+ assertTrue("Forrest should have been stripped out", str.IndexOf("forrest", StringComparison.Ordinal) == -1 && str.IndexOf("Forrest", StringComparison.Ordinal) == -1);
+ assertTrue("File should start with 'Welcome to Solr' after trimming", str.Trim().StartsWith("Welcome to Solr", StringComparison.Ordinal));
+
+ assertTrue("File should start with 'Foundation.' after trimming", str.Trim().EndsWith("Foundation.", StringComparison.Ordinal));
+
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMSWord14GeneratedHTML() throws Exception
+ public virtual void testMSWord14GeneratedHTML()
+ {
+ System.IO.Stream stream = this.GetType().getResourceAsStream("MS-Word 14 generated.htm");
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
+ string gold = "This is a test";
+ StringBuilder builder = new StringBuilder();
+ int ch = 0;
+ while ((ch = reader.read()) != -1)
+ {
+ builder.Append((char)ch);
+ }
+ // Compare trim()'d output to gold
+ assertEquals("'" + builder.ToString().Trim() + "' is not equal to '" + gold + "'", gold, builder.ToString().Trim());
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testGamma() throws Exception
+ public virtual void testGamma()
+ {
+ assertHTMLStripsTo("Γ", "\u0393", new HashSet<>(Arrays.asList("reserved")));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEntities() throws Exception
+ public virtual void testEntities()
+ {
+ string test = " <foo> Übermensch = Γ bar Γ";
+ string gold = " <foo> \u00DCbermensch = \u0393 bar \u0393";
+ assertHTMLStripsTo(test, gold, new HashSet<>(Arrays.asList("reserved")));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMoreEntities() throws Exception
+ public virtual void testMoreEntities()
+ {
+ string test = " <junk/> ! @ and ’";
+ string gold = " <junk/> ! @ and ’";
+ assertHTMLStripsTo(test, gold, new HashSet<>(Arrays.asList("reserved")));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReserved() throws Exception
+ public virtual void testReserved()
+ {
+ string test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
+ ISet<string> set = new HashSet<string>();
+ set.Add("reserved");
+ Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
+ StringBuilder builder = new StringBuilder();
+ int ch = 0;
+ while ((ch = reader.read()) != -1)
+ {
+ builder.Append((char)ch);
+ }
+ string result = builder.ToString();
+ // System.out.println("Result: " + result);
+ assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", StringComparison.Ordinal), result.IndexOf("reserved", StringComparison.Ordinal) == 9);
+ assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 15, StringComparison.Ordinal), result.IndexOf("reserved", 15, StringComparison.Ordinal) == 38);
+ assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 41, StringComparison.Ordinal), result.IndexOf("reserved", 41, StringComparison.Ordinal) == 54);
+ assertTrue("Other tag should be removed", result.IndexOf("other", StringComparison.Ordinal) == -1);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMalformedHTML() throws Exception
+ public virtual void testMalformedHTML()
+ {
+ string[] testGold = new string[] {"a <a hr<ef=aa<a>> </close</a>", "a <a hr<ef=aa> </close", "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\" class=lu style=\"font-size: 9px\" target=dmoz>Submit a Site</a>", "Submit a Site", "<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand id=e8 class=expanded rel=http://www.csmonitor.com/>Christian Science", "Christian Science", "<link rel=\"alternate\" type=\"application/rss+xml\" title=\"San Francisco \" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\" />", "\n", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\"http://ucblibraries.colorado.edu/how/index.htm\"class=\"pageN
avAreaText\">", "", "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\" type=\"application/opensearchdescription+xml\" href=\"http://21sta.com/blog/inc/opensearch.php\" />", "\n", "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?", "?", "<a href='/modern-furniture' ' id='21txt' class='offtab' onMouseout=\"this.className='offtab'; return true;\" onMouseover=\"this.className='ontab'; return true;\">", "", "<a href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size: 275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>", "", "The <a href=<a href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a> practice software</a>", "The <a href=medical\">http://www.advancedmd.com>medical practice software", "<a href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008 Clip of the Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week 29...", "Levi.com/BMX 2008 Clip of the Week 29...", "<a href=\"
printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer Friendly", "Printer Friendly", "<a href=#\" ondragstart=\"return false\" onclick=\"window.external.AddFavorite('http://www.amazingtextures.com', 'Amazing Textures');return false\" onmouseover=\"window.status='Add to Favorites';return true\">Add to Favorites", "Add to Favorites", "<a href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At", "At", "E-mail: <a href=\"\"mailto:XXXXXX@example.com\" \">XXXXXX@example.com </a>", "E-mail: XXXXXX@example.com ", "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\" href=\"http://www.america.gov/persian\" alt=\"\" name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>", "\nA'13?\n", "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert \"Geese\" Ausby</a></li>", "\nHubert \"Geese\" Ausby\n", "<href=\"http://anbportal.com/mms/login.asp\">", "\n", "<a href=\"", "<a href=\"", "<a href=\">", "", "<a rel=\"nofollow\" href=\"http://anissanina31.skyrock.com/18950394
93-Hi-tout-le-monde.html\" title=\" Hi, tout le monde !>#</a>", "#", "<a href=\"http://annunciharleydavidsonusate.myblog.it/\" title=\"Annunci Moto e Accessori Harley Davidson\" target=\"_blank\"><img src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\" /></a>", "", "<a href=\"video/addvideo&v=120838887181\" onClick=\"return confirm('Are you sure you want add this video to your profile? If it exists some video in your profile will be overlapped by this video!!')\" \" onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">", "", "<a href=#Services & Support>", "", "<input type=\"image\" src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\" name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + document.getElementById('sel').value ; var dt ='&dt=' + document.getElementById('dt').value; var searchKeyword = document.getElementById('q').value ; searchKeyword = searchKeyword.replace(/\\s
/g,''); if (searchKeyword.length < 3){alert('Nothing to search. Search keyword should contain atleast 3 chars.'); return false; } var al='&al=' + document.getElementById('advancedlink').style.display ; document.location.href='http://apologyindex.com/search.aspx?q=' + document.getElementById('q').value + incom + dt + al;\" />", "", "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\" height=\"22\" hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){ window.sidebar.addPanel(document.title,location.href,''); }else if(window.external){ window.external.AddFavorite(location.href,document.title); }else if(window.opera&&window.print) { return true; }\">", "", "<area shape=\"rect\" coords=\"12,153,115,305\" href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's Norwegian Roots in Rogaland\">", "\n", "<a rel=\"nofollow\" href=\"http://arth26.skyrock.com/660188240-bonzai.html\" title=\"bonza>#", "#", "<a href= >", "", "<ahref
=http:..", "<ahref=http:..", "<ahref=http:..>", "\n", "<ahref=\"http://aseigo.bddf.ca/cms/1025\">A", "\nA", "<a href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">", "", "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\" Rackmounts\">", "", "<a href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html title=savage model 110 manual rel=dofollow>", "", "<a class=\"at\" name=\"Lamborghini href=\"http://lamborghini.coolbegin.com\">Lamborghini /a>", "Lamborghini /a>", "<A href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on eye black for championship game' TARGET=_blank>", "", "<a href=/myspace !style='color:#993333'>", "", "<meta name=3DProgId content=3DExcel.Sheet>", "\n", "<l
ink id=3D\"shLink\" href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">", "\n", "<td bgcolor=3D\"#FFFFFF\" nowrap>", "\n", "<a href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\">\"predicciones mundiales 2009\"</a>", "\"predicciones mundiales 2009\"", "<a class=\"comment-link\" href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>", "", "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\" videos\">Bishop\"</a>", "Bishop\"", "<a href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\">BHAA Eircom 2 & 5 miles CC combined start</a>", "BHAA Eircom 2 & 5 miles CC combined start", "<a href=\"http://people.tribe.net/wolfmana\" onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\" title=\"Mana\">", "", "<a href=\"http://blog.edu-cyberpg.com/ct.ashx?id=
6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\" eudora=\"autourl\">", "", "<input type=\"text\" value=\"<search here>\">", "<input type=\"text\" value=\"\n\">", "<input type=\"text\" value=\"<search here\">", "<input type=\"text\" value=\"\n", "<input type=\"text\" value=\"search here>\">", "\">", "<input type=\"text\" value=\"<search here>\" onFocus=\"this.value='<search here>'\">", "", "<![if ! IE]>\n<link href=\"http://i.deviantart.com/icons/favicon.png\" rel=\"shortcut icon\"/>\n<![endif]>", "\n\n\n", "<![if supportMisalignedColumns]>\n<tr height=0 style='display:none'>\n<td width=64 style='width:48pt'></td>\n</tr>\n<![endif]>", "\n\n\n\n\n\n\n\n"};
+ for (int i = 0 ; i < testGold.Length ; i += 2)
+ {
+ assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
+ }
+ }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBufferOverflow() throws Exception
+ public virtual void testBufferOverflow()
+ {
+ StringBuilder testBuilder = new StringBuilder(HTMLStripCharFilter.InitialBufferSize + 50);
+ testBuilder.Append("ah<?> ??????");
+ appendChars(testBuilder, HTMLStripCharFilter.InitialBufferSize + 500);
+ Reader reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(testBuilder.ToString()))); //force the use of BufferedReader
+ assertHTMLStripsTo(reader, testBuilder.ToString(), null);
+
+ testBuilder.Length = 0;
+ testBuilder.Append("<!--"); //comments
+ appendChars(testBuilder, 3 * HTMLStripCharFilter.InitialBufferSize + 500); //comments have two lookaheads
+
+ testBuilder.Append("-->foo");
+ string gold = "foo";
+ assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+
+ testBuilder.Length = 0;
+ testBuilder.Append("<?");
+ appendChars(testBuilder, HTMLStripCharFilter.InitialBufferSize + 500);
+ testBuilder.Append("?>");
+ gold = "";
+ assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+
+ testBuilder.Length = 0;
+ testBuilder.Append("<b ");
+ appendChars(testBuilder, HTMLStripCharFilter.InitialBufferSize + 500);
+ testBuilder.Append("/>");
+ gold = "";
+ assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+ }
+
+ private void appendChars(StringBuilder testBuilder, int numChars)
+ {
+ int i1 = numChars / 2;
+ for (int i = 0; i < i1; i++)
+ {
+ testBuilder.Append('a').Append(' '); //tack on enough to go beyond the mark readahead limit, since <?> makes HTMLStripCharFilter think it is a processing instruction
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComment() throws Exception
+ public virtual void testComment()
+ {
+ string test = "<!--- three dashes, still a valid comment ---> ";
+ string gold = " ";
+ assertHTMLStripsTo(test, gold, null);
+
+ test = "<! -- blah > "; // should not be recognized as a comment
+ gold = " ";
+ assertHTMLStripsTo(test, gold, null);
+
+ StringBuilder testBuilder = new StringBuilder("<!--");
+ appendChars(testBuilder, TestUtil.Next(random(), 0, 1000));
+ gold = "";
+ assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+ }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void doTestOffsets(String in) throws Exception
+ public virtual void doTestOffsets(string @in)
+ {
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
+ int ch = 0;
+ int off = 0; // offset in the reader
+ int strOff = -1; // offset in the original string
+ while ((ch = reader.read()) != -1)
+ {
+ int correctedOff = reader.correctOffset(off);
+
+ if (ch == 'X')
+ {
+ strOff = @in.IndexOf('X',strOff + 1);
+ assertEquals(strOff, correctedOff);
+ }
+
+ off++;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+ public virtual void testOffsets()
+ {
+ // doTestOffsets("hello X how X are you");
+ doTestOffsets("hello <p> X<p> how <p>X are you");
+ doTestOffsets("X & X ( X < > X");
+
+ // test backtracking
+ doTestOffsets("X < &zz >X &# < X > < &l > &g < X");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void assertLegalOffsets(String in) throws Exception
+ internal static void assertLegalOffsets(string @in)
+ {
+ int length = @in.Length;
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
+ int ch = 0;
+ int off = 0;
+ while ((ch = reader.read()) != -1)
+ {
+ int correction = reader.correctOffset(off);
+ assertTrue("invalid offset correction: " + off + "->" + correction + " for doc of length: " + length, correction <= length);
+ off++;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLegalOffsets() throws Exception
+ public virtual void testLegalOffsets()
+ {
+ assertLegalOffsets("hello world");
+ assertLegalOffsets("hello &#x world");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandom() throws Exception
+ public virtual void testRandom()
+ {
+ int numRounds = RANDOM_MULTIPLIER * 1000;
+ checkRandomData(random(), newTestAnalyzer(), numRounds);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+ public virtual void testRandomHugeStrings()
+ {
+ int numRounds = RANDOM_MULTIPLIER * 100;
+ checkRandomData(random(), newTestAnalyzer(), numRounds, 8192);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCloseBR() throws Exception
+ public virtual void testCloseBR()
+ {
+ checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), " Secretary)</br> [[M");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testServerSideIncludes() throws Exception
+ public virtual void testServerSideIncludes()
+ {
+ string test = "one<img src=\"image.png\"\n" + " alt = \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}' -->\"\n\n" + " title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
+ string gold = "onetwo";
+ assertHTMLStripsTo(test, gold, null);
+
+ test = "one<script><!-- <!--#config comment=\"<!-- \\\"comment\\\"-->\"--> --></script>two";
+ gold = "one\ntwo";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testScriptQuotes() throws Exception
+ public virtual void testScriptQuotes()
+ {
+ string test = "one<script attr= bare><!-- action('<!-- comment -->', \"\\\"-->\\\"\"); --></script>two";
+ string gold = "one\ntwo";
+ assertHTMLStripsTo(test, gold, null);
+
+ test = "hello<script><!-- f('<!--internal--></script>'); --></script>";
+ gold = "hello\n";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapeScript() throws Exception
+ public virtual void testEscapeScript()
+ {
+ string test = "one<script no-value-attr>callSomeMethod();</script>two";
+ string gold = "one<script no-value-attr></script>two";
+ ISet<string> escapedTags = new HashSet<string>(Arrays.asList("SCRIPT"));
+ assertHTMLStripsTo(test, gold, escapedTags);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStyle() throws Exception
+ public virtual void testStyle()
+ {
+ string test = "one<style type=\"text/css\">\n" + "<!--\n" + "@import url('http://www.lasletrasdecanciones.com/css.css');\n" + "-->\n" + "</style>two";
+ string gold = "one\ntwo";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapeStyle() throws Exception
+ public virtual void testEscapeStyle()
+ {
+ string test = "one<style type=\"text/css\"> body,font,a { font-family:arial; } </style>two";
+ string gold = "one<style type=\"text/css\"></style>two";
+ ISet<string> escapedTags = new HashSet<string>(Arrays.asList("STYLE"));
+ assertHTMLStripsTo(test, gold, escapedTags);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBR() throws Exception
+ public virtual void testBR()
+ {
+ string[] testGold = new string[] {"one<BR />two<br>three", "one\ntwo\nthree", "one<BR some stuff here too>two</BR>", "one\ntwo\n"};
+ for (int i = 0 ; i < testGold.Length ; i += 2)
+ {
+ assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
+ }
+ }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapeBR() throws Exception
+ public virtual void testEscapeBR()
+ {
+ string test = "one<BR class='whatever'>two</\nBR\n>";
+ string gold = "one<BR class='whatever'>two</\nBR\n>";
+ ISet<string> escapedTags = new HashSet<string>(Arrays.asList("BR"));
+ assertHTMLStripsTo(test, gold, escapedTags);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInlineTagsNoSpace() throws Exception
+ public virtual void testInlineTagsNoSpace()
+ {
+ string test = "one<sPAn class=\"invisible\">two<sup>2<sup>e</sup></sup>.</SpaN>three";
+ string gold = "onetwo2e.three";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCDATA() throws Exception
+ public virtual void testCDATA()
+ {
+ int maxNumElems = 100;
+ string randomHtmlishString1 = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__"); // Don't create a comment (disallow "<!--") and don't include a closing ">"
+ string closedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[&]]>";
+
+ string randomHtmlishString2 = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__"); // Don't create a comment (disallow "<!--") and don't include a closing ">"
+ string unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[";
+
+ string[] testGold = new string[] {"one<![CDATA[<one><two>three<four></four></two></one>]]>two", "one<one><two>three<four></four></two></one>two", "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five", "onetwo<![CDATA[three]]>fourfive", "<! [CDATA[&]]>", "", "<! [CDATA[&] ] >", "", "<! [CDATA[&]]", "<! [CDATA[&]]", "<!\u2009[CDATA[&]]>", "", "<!\u2009[CDATA[&]\u2009]\u2009>", "", "<!\u2009[CDATA[&]\u2009]\u2009", "<!\u2009[CDATA[&]\u2009]\u2009", closedAngleBangNonCDATA, "", "<![CDATA[", "", "<![CDATA[<br>", "<br>", "<![CDATA[<br>]]", "<br>]]", "<![CDATA[<br>]]>", "<br>", "<![CDATA[<br>] ] >", "<br>] ] >", "<![CDATA[<br>]\u2009]\u2009>", "<br>]\u2009]\u2009>", "<!\u2009[CDATA[", "<!\u2009[CDATA[", unclosedAngleBangNonCDATA, unclosedAngleBangNonCDATA};
+ for (int i = 0 ; i < testGold.Length ; i += 2)
+ {
+ assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnclosedAngleBang() throws Exception
+ public virtual void testUnclosedAngleBang()
+ {
+ assertHTMLStripsTo("<![endif]", "<![endif]", null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUppercaseCharacterEntityVariants() throws Exception
+ public virtual void testUppercaseCharacterEntityVariants()
+ {
+ string test = " "-©>><<®&";
+ string gold = " \"-\u00A9>><<\u00AE&";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMSWordMalformedProcessingInstruction() throws Exception
+ public virtual void testMSWordMalformedProcessingInstruction()
+ {
+ string test = "one<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />two";
+ string gold = "onetwo";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSupplementaryCharsInTags() throws Exception
+ public virtual void testSupplementaryCharsInTags()
+ {
+ string test = "one<𩬅艱鍟䇹愯瀛>two<瀛愯𩬅>three 瀛愯𩬅</瀛愯𩬅>four</𩬅艱鍟䇹愯瀛>five<𠀀𠀀>six<𠀀𠀀/>seven";
+ string gold = "one\ntwo\nthree 瀛愯𩬅\nfour\nfive\nsix\nseven";
+ assertHTMLStripsTo(test, gold, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomBrokenHTML() throws Exception
+ public virtual void testRandomBrokenHTML()
+ {
+ int maxNumElements = 10000;
+ string text = TestUtil.randomHtmlishString(random(), maxNumElements);
+ checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), text);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomText() throws Exception
+ public virtual void testRandomText()
+ {
+ StringBuilder text = new StringBuilder();
+ int minNumWords = 10;
+ int maxNumWords = 10000;
+ int minWordLength = 3;
+ int maxWordLength = 20;
+ int numWords = TestUtil.Next(random(), minNumWords, maxNumWords);
+ switch (TestUtil.Next(random(), 0, 4))
+ {
+ case 0:
+ {
+ for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
+ {
+ text.Append(TestUtil.randomUnicodeString(random(), maxWordLength));
+ text.Append(' ');
+ }
+ break;
+ }
+ case 1:
+ {
+ for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
+ {
+ text.Append(TestUtil.randomRealisticUnicodeString(random(), minWordLength, maxWordLength));
+ text.Append(' ');
+ }
+ break;
+ }
+ default:
+ { // ASCII 50% of the time
+ for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
+ {
+ text.Append(TestUtil.randomSimpleString(random()));
+ text.Append(' ');
+ }
+ }
+ break;
+ }
+ Reader reader = new HTMLStripCharFilter(new StringReader(text.ToString()));
+ while (reader.read() != -1);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUTF16Surrogates() throws Exception
+ public virtual void testUTF16Surrogates()
+ {
+ Analyzer analyzer = newTestAnalyzer();
+ // Paired surrogates
+ assertAnalyzesTo(analyzer, " one two ��three", new string[] {"one", "two", "\uD86C\uDC01three"});
+ assertAnalyzesTo(analyzer, " ��", new string[] {"\uD86C\uDC01"});
+ assertAnalyzesTo(analyzer, " ��", new string[] {"\uD86C\uDC01"});
+ assertAnalyzesTo(analyzer, " ��", new string[] {"\uD86C\uDC01"});
+
+ // Improperly paired surrogates
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD\uE28F"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD\uE28F"});
+ assertAnalyzesTo(analyzer, " 훚�", new string[] {"\uD6DA\uFFFD"});
+ assertAnalyzesTo(analyzer, " 훚�", new string[] {"\uD6DA\uFFFD"});
+
+ // Unpaired high surrogates
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �<br>", new string[] {"�"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �<br>", new string[] {"�"});
+
+ // Unpaired low surrogates
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �<br>", new string[] {"�"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �", new string[] {"\uFFFD"});
+ assertAnalyzesTo(analyzer, " �<br>", new string[] {"�"});
+ }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void assertHTMLStripsTo(String input, String gold, java.util.Set<String> escapedTags) throws Exception
+ public static void assertHTMLStripsTo(string input, string gold, ISet<string> escapedTags)
+ {
+ assertHTMLStripsTo(new StringReader(input), gold, escapedTags);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void assertHTMLStripsTo(java.io.Reader input, String gold, java.util.Set<String> escapedTags) throws Exception
+ public static void assertHTMLStripsTo(Reader input, string gold, ISet<string> escapedTags)
+ {
+ HTMLStripCharFilter reader;
+ if (null == escapedTags)
+ {
+ reader = new HTMLStripCharFilter(input);
+ }
+ else
+ {
+ reader = new HTMLStripCharFilter(input, escapedTags);
+ }
+ int ch = 0;
+ StringBuilder builder = new StringBuilder();
+ try
+ {
+ while ((ch = reader.read()) != -1)
+ {
+ builder.Append((char)ch);
+ }
+ }
+ catch (Exception e)
+ {
+ if (gold.Equals(builder.ToString()))
+ {
+ throw e;
+ }
+ throw new Exception("('" + builder.ToString() + "' is not equal to '" + gold + "'). " + e.Message, e);
+ }
+ assertEquals("'" + builder.ToString() + "' is not equal to '" + gold + "'", gold, builder.ToString());
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs
new file mode 100644
index 0000000..08adf1b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs
@@ -0,0 +1,121 @@
+namespace org.apache.lucene.analysis.charfilter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure this factory is working
+ /// </summary>
+ public class TestHTMLStripCharFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNothingChanged() throws Exception
+ public virtual void testNothingChanged()
+ {
+ // 11111111112
+ // 012345678901234567890
+ const string text = "this is only a test.";
+ Reader cs = charFilterFactory("HTMLStrip", "escapedTags", "a, Title").create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {0, 5, 8, 13, 15}, new int[] {4, 7, 12, 14, 20});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNoEscapedTags() throws Exception
+ public virtual void testNoEscapedTags()
+ {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+ Reader cs = charFilterFactory("HTMLStrip").create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {3, 12, 18, 27, 32}, new int[] {11, 14, 26, 28, 41});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapedTags() throws Exception
+ public virtual void testEscapedTags()
+ {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+ Reader cs = charFilterFactory("HTMLStrip", "escapedTags", "U i").create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[] {"<u>this</u>", "is", "only", "a", "<I>test</I>."}, new int[] {0, 12, 18, 27, 29}, new int[] {11, 14, 26, 28, 41});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSeparatorOnlyEscapedTags() throws Exception
+ public virtual void testSeparatorOnlyEscapedTags()
+ {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+ Reader cs = charFilterFactory("HTMLStrip", "escapedTags", ",, , ").create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {3, 12, 18, 27, 32}, new int[] {11, 14, 26, 28, 41});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyEscapedTags() throws Exception
+ public virtual void testEmptyEscapedTags()
+ {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+ Reader cs = charFilterFactory("HTMLStrip", "escapedTags", "").create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {3, 12, 18, 27, 32}, new int[] {11, 14, 26, 28, 41});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleEscapedTag() throws Exception
+ public virtual void testSingleEscapedTag()
+ {
+ // 11111111112222222222333333333344
+ // 012345678901234567890123456789012345678901
+ const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+ Reader cs = charFilterFactory("HTMLStrip", "escapedTags", ", B\r\n\t").create(new StringReader(text));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[] {"this", "is", "<b>only</b>", "a", "test."}, new int[] {3, 12, 15, 27, 32}, new int[] {11, 14, 26, 28, 41});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ charFilterFactory("HTMLStrip", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs
new file mode 100644
index 0000000..8b3d5fa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs
@@ -0,0 +1,636 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+
+ using TestUtil = org.apache.lucene.util.TestUtil;
+ using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+
+ public class TestMappingCharFilter : BaseTokenStreamTestCase
+ {
+
+ internal NormalizeCharMap normMap;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+ public override void setUp()
+ {
+ base.setUp();
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+
+ builder.add("aa", "a");
+ builder.add("bbb", "b");
+ builder.add("cccc", "cc");
+
+ builder.add("h", "i");
+ builder.add("j", "jj");
+ builder.add("k", "kkk");
+ builder.add("ll", "llll");
+
+ builder.add("empty", "");
+
+ // BMP (surrogate pair):
+ builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
+
+ builder.add("\uff01", "full-width-exclamation");
+
+ normMap = builder.build();
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReaderReset() throws Exception
+ public virtual void testReaderReset()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
+ char[] buf = new char[10];
+ int len = cs.read(buf, 0, 10);
+ assertEquals(1, len);
+ assertEquals('x', buf[0]);
+ len = cs.read(buf, 0, 10);
+ assertEquals(-1, len);
+
+ // rewind
+ cs.reset();
+ len = cs.read(buf, 0, 10);
+ assertEquals(1, len);
+ assertEquals('x', buf[0]);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNothingChange() throws Exception
+ public virtual void testNothingChange()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"x"}, new int[]{0}, new int[]{1}, 1);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test1to1() throws Exception
+ public virtual void test1to1()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("h"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"i"}, new int[]{0}, new int[]{1}, 1);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test1to2() throws Exception
+ public virtual void test1to2()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("j"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"jj"}, new int[]{0}, new int[]{1}, 1);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test1to3() throws Exception
+ public virtual void test1to3()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("k"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"kkk"}, new int[]{0}, new int[]{1}, 1);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test2to4() throws Exception
+ public virtual void test2to4()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("ll"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"llll"}, new int[]{0}, new int[]{2}, 2);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test2to1() throws Exception
+ public virtual void test2to1()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("aa"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"a"}, new int[]{0}, new int[]{2}, 2);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test3to1() throws Exception
+ public virtual void test3to1()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("bbb"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"b"}, new int[]{0}, new int[]{3}, 3);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test4to2() throws Exception
+ public virtual void test4to2()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("cccc"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"cc"}, new int[]{0}, new int[]{4}, 4);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test5to0() throws Exception
+ public virtual void test5to0()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("empty"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[0], new int[]{}, new int[]{}, 5);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonBMPChar() throws Exception
+ public virtual void testNonBMPChar()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1)));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullWidthChar() throws Exception
+ public virtual void testFullWidthChar()
+ {
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader("\uff01"));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
+ }
+
+ //
+ // 1111111111222
+ // 01234567890123456789012
+ //(in) h i j k ll cccc bbb aa
+ //
+ // 1111111111222
+ // 01234567890123456789012
+ //(out) i i jj kkk llll cc b a
+ //
+ // h, 0, 1 => i, 0, 1
+ // i, 2, 3 => i, 2, 3
+ // j, 4, 5 => jj, 4, 5
+ // k, 6, 7 => kkk, 6, 7
+ // ll, 8,10 => llll, 8,10
+ // cccc,11,15 => cc,11,15
+ // bbb,16,19 => b,16,19
+ // aa,20,22 => a,20,22
+ //
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenStream() throws Exception
+ public virtual void testTokenStream()
+ {
+ string testString = "h i j k ll cccc bbb aa";
+ CharFilter cs = new MappingCharFilter(normMap, new StringReader(testString));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"i","i","jj","kkk","llll","cc","b","a"}, new int[]{0,2,4,6,8,11,16,20}, new int[]{1,3,5,7,10,15,19,22}, testString.Length);
+ }
+
+ //
+ //
+ // 0123456789
+ //(in) aaaa ll h
+ //(out-1) aa llll i
+ //(out-2) a llllllll i
+ //
+ // aaaa,0,4 => a,0,4
+ // ll,5,7 => llllllll,5,7
+ // h,8,9 => i,8,9
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChained() throws Exception
+ public virtual void testChained()
+ {
+ string testString = "aaaa ll h";
+ CharFilter cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString)));
+ TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+ assertTokenStreamContents(ts, new string[]{"a","llllllll","i"}, new int[]{0,5,8}, new int[]{4,7,9}, testString.Length);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandom() throws Exception
+ public virtual void testRandom()
+ {
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+
+ int numRounds = RANDOM_MULTIPLIER * 10000;
+ checkRandomData(random(), analyzer, numRounds);
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestMappingCharFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestMappingCharFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ return new MappingCharFilter(outerInstance.normMap, reader);
+ }
+ }
+
+ //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalOffsetSpecialCase() throws Exception
+ public virtual void testFinalOffsetSpecialCase()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ builder.add("t", "");
+ // even though this below rule has no effect, the test passes if you remove it!!
+ builder.add("tmakdbl", "c");
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap map = builder.build();
+ NormalizeCharMap map = builder.build();
+
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);
+
+ string text = "gzw f quaxot";
+ checkAnalysisConsistency(random(), analyzer, false, text);
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestMappingCharFilter outerInstance;
+
+ private NormalizeCharMap map;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestMappingCharFilter outerInstance, NormalizeCharMap map)
+ {
+ this.outerInstance = outerInstance;
+ this.map = map;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ return new MappingCharFilter(map, reader);
+ }
+ }
+
+ //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomMaps() throws Exception
+ public virtual void testRandomMaps()
+ {
+ int numIterations = atLeast(3);
+ for (int i = 0; i < numIterations; i++)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap map = randomMap();
+ NormalizeCharMap map = randomMap();
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
+ int numRounds = 100;
+ checkRandomData(random(), analyzer, numRounds);
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestMappingCharFilter outerInstance;
+
+ private NormalizeCharMap map;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestMappingCharFilter outerInstance, NormalizeCharMap map)
+ {
+ this.outerInstance = outerInstance;
+ this.map = map;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ return new MappingCharFilter(map, reader);
+ }
+ }
+
+ private NormalizeCharMap randomMap()
+ {
+ Random random = random();
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ // we can't add duplicate keys, or NormalizeCharMap gets angry
+ ISet<string> keys = new HashSet<string>();
+ int num = random.Next(5);
+ //System.out.println("NormalizeCharMap=");
+ for (int i = 0; i < num; i++)
+ {
+ string key = TestUtil.randomSimpleString(random);
+ if (!keys.Contains(key) && key.Length != 0)
+ {
+ string value = TestUtil.randomSimpleString(random);
+ builder.add(key, value);
+ keys.Add(key);
+ //System.out.println("mapping: '" + key + "' => '" + value + "'");
+ }
+ }
+ return builder.build();
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomMaps2() throws Exception
+ public virtual void testRandomMaps2()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Random random = random();
+ Random random = random();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int numIterations = atLeast(3);
+ int numIterations = atLeast(3);
+ for (int iter = 0;iter < numIterations;iter++)
+ {
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST iter=" + iter);
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z');
+ char endLetter = (char) TestUtil.Next(random, 'b', 'z');
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>();
+ IDictionary<string, string> map = new Dictionary<string, string>();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int numMappings = atLeast(5);
+ int numMappings = atLeast(5);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" mappings:");
+ }
+ while (map.Count < numMappings)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
+ string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
+ if (key.Length != 0 && !map.ContainsKey(key))
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random);
+ string value = TestUtil.randomSimpleString(random);
+ map[key] = value;
+ builder.add(key, value);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" " + key + " -> " + value);
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap charMap = builder.build();
+ NormalizeCharMap charMap = builder.build();
+
+ if (VERBOSE)
+ {
+ Console.WriteLine(" test random documents...");
+ }
+
+ for (int iter2 = 0;iter2 < 100;iter2++)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
+ string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
+
+ if (VERBOSE)
+ {
+ Console.WriteLine(" content=" + content);
+ }
+
+ // Do stupid dog-slow mapping:
+
+ // Output string:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder output = new StringBuilder();
+ StringBuilder output = new StringBuilder();
+
+ // Maps output offset to input offset:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>();
+ IList<int?> inputOffsets = new List<int?>();
+
+ int cumDiff = 0;
+ int charIdx = 0;
+ while (charIdx < content.Length)
+ {
+
+ int matchLen = -1;
+ string matchRepl = null;
+
+ foreach (KeyValuePair<string, string> ent in map.SetOfKeyValuePairs())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String match = ent.getKey();
+ string match = ent.Key;
+ if (charIdx + match.Length <= content.Length)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = charIdx+match.length();
+ int limit = charIdx + match.Length;
+ bool matches = true;
+ for (int charIdx2 = charIdx;charIdx2 < limit;charIdx2++)
+ {
+ if (match[charIdx2 - charIdx] != content[charIdx2])
+ {
+ matches = false;
+ break;
+ }
+ }
+
+ if (matches)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String repl = ent.getValue();
+ string repl = ent.Value;
+ if (match.Length > matchLen)
+ {
+ // Greedy: longer match wins
+ matchLen = match.Length;
+ matchRepl = repl;
+ }
+ }
+ }
+ }
+
+ if (matchLen != -1)
+ {
+ // We found a match here!
+ if (VERBOSE)
+ {
+ Console.WriteLine(" match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
+ }
+ output.Append(matchRepl);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length());
+ int minLen = Math.Min(matchLen, matchRepl.Length);
+
+ // Common part, directly maps back to input
+ // offset:
+ for (int outIdx = 0;outIdx < minLen;outIdx++)
+ {
+ inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
+ }
+
+ cumDiff += matchLen - matchRepl.Length;
+ charIdx += matchLen;
+
+ if (matchRepl.Length < matchLen)
+ {
+ // Replacement string is shorter than matched
+ // input: nothing to do
+ }
+ else if (matchRepl.Length > matchLen)
+ {
+ // Replacement string is longer than matched
+ // input: for all the "extra" chars we map
+ // back to a single input offset:
+ for (int outIdx = matchLen;outIdx < matchRepl.Length;outIdx++)
+ {
+ inputOffsets.Add(output.Length + cumDiff - 1);
+ }
+ }
+ else
+ {
+ // Same length: no change to offset
+ }
+
+ Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
+ }
+ else
+ {
+ inputOffsets.Add(output.Length + cumDiff);
+ output.Append(content[charIdx]);
+ charIdx++;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String expected = output.toString();
+ string expected = output.ToString();
+ if (VERBOSE)
+ {
+ Console.Write(" expected:");
+ for (int charIdx2 = 0;charIdx2 < expected.Length;charIdx2++)
+ {
+ Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
+ }
+ Console.WriteLine();
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content));
+ MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content));
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder();
+ StringBuilder actualBuilder = new StringBuilder();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>();
+ IList<int?> actualInputOffsets = new List<int?>();
+
+ // Now consume the actual mapFilter, somewhat randomly:
+ while (true)
+ {
+ if (random.nextBoolean())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = mapFilter.read();
+ int ch = mapFilter.read();
+ if (ch == -1)
+ {
+ break;
+ }
+ actualBuilder.Append((char) ch);
+ }
+ else
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)];
+ char[] buffer = new char[TestUtil.Next(random, 1, 100)];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1);
+ int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off);
+ int count = mapFilter.read(buffer, off, buffer.Length - off);
+ if (count == -1)
+ {
+ break;
+ }
+ else
+ {
+ actualBuilder.Append(buffer, off, count);
+ }
+ }
+
+ if (random.Next(10) == 7)
+ {
+ // Map offsets
+ while (actualInputOffsets.Count < actualBuilder.Length)
+ {
+ actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
+ }
+ }
+ }
+
+ // Finish mappping offsets
+ while (actualInputOffsets.Count < actualBuilder.Length)
+ {
+ actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String actual = actualBuilder.toString();
+ string actual = actualBuilder.ToString();
+
+ // Verify:
+ assertEquals(expected, actual);
+ assertEquals(inputOffsets, actualInputOffsets);
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs
new file mode 100644
index 0000000..078707f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs
@@ -0,0 +1,82 @@
+namespace org.apache.lucene.analysis.charfilter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ public class TestMappingCharFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testParseString() throws Exception
+ public virtual void testParseString()
+ {
+
+ MappingCharFilterFactory f = (MappingCharFilterFactory)charFilterFactory("Mapping");
+
+ try
+ {
+ f.parseString("\\");
+ fail("escape character cannot be alone.");
+ }
+ catch (System.ArgumentException)
+ {
+ }
+
+ assertEquals("unexpected escaped characters", "\\\"\n\t\r\b\f", f.parseString("\\\\\\\"\\n\\t\\r\\b\\f"));
+ assertEquals("unexpected escaped characters", "A", f.parseString("\\u0041"));
+ assertEquals("unexpected escaped characters", "AB", f.parseString("\\u0041\\u0042"));
+
+ try
+ {
+ f.parseString("\\u000");
+ fail("invalid length check.");
+ }
+ catch (System.ArgumentException)
+ {
+ }
+
+ try
+ {
+ f.parseString("\\u123x");
+ fail("invalid hex number check.");
+ }
+ catch (System.FormatException)
+ {
+ }
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ charFilterFactory("Mapping", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs
new file mode 100644
index 0000000..0b2a3b1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs
@@ -0,0 +1,289 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
+ using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+ using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+ using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+ using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ /// <summary>
+ /// Most tests adopted from TestCJKTokenizer
+ /// </summary>
+ public class TestCJKAnalyzer : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa1() throws java.io.IOException
+ public virtual void testJa1()
+ {
+ assertAnalyzesTo(analyzer, "一二三四五六七八九十", new string[] {"一二", "二三", "三四", "四五", "五六", "六七", "七八", "八九", "九十"}, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8}, new int[] {2, 3, 4, 5, 6, 7, 8, 9, 10}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa2() throws java.io.IOException
+ public virtual void testJa2()
+ {
+ assertAnalyzesTo(analyzer, "一 二三四 五六七八九 十", new string[] {"一", "二三", "三四", "五六", "六七", "七八", "八九", "十"}, new int[] {0, 2, 3, 6, 7, 8, 9, 12}, new int[] {1, 4, 5, 8, 9, 10, 11, 13}, new string[] {"<SINGLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testC() throws java.io.IOException
+ public virtual void testC()
+ {
+ assertAnalyzesTo(analyzer, "abc defgh ijklmn opqrstu vwxy z", new string[] {"abc", "defgh", "ijklmn", "opqrstu", "vwxy", "z"}, new int[] {0, 4, 10, 17, 25, 30}, new int[] {3, 9, 16, 24, 29, 31}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"}, new int[] {1, 1, 1, 1, 1, 1});
+ }
+
+ /// <summary>
+ /// LUCENE-2207: wrong offset calculated by end()
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalOffset() throws java.io.IOException
+ public virtual void testFinalOffset()
+ {
+ assertAnalyzesTo(analyzer, "あい", new string[] {"あい"}, new int[] {0}, new int[] {2}, new string[] {"<DOUBLE>"}, new int[] {1});
+
+ assertAnalyzesTo(analyzer, "あい ", new string[] {"あい"}, new int[] {0}, new int[] {2}, new string[] {"<DOUBLE>"}, new int[] {1});
+
+ assertAnalyzesTo(analyzer, "test", new string[] {"test"}, new int[] {0}, new int[] {4}, new string[] {"<ALPHANUM>"}, new int[] {1});
+
+ assertAnalyzesTo(analyzer, "test ", new string[] {"test"}, new int[] {0}, new int[] {4}, new string[] {"<ALPHANUM>"}, new int[] {1});
+
+ assertAnalyzesTo(analyzer, "あいtest", new string[] {"あい", "test"}, new int[] {0, 2}, new int[] {2, 6}, new string[] {"<DOUBLE>", "<ALPHANUM>"}, new int[] {1, 1});
+
+ assertAnalyzesTo(analyzer, "testあい ", new string[] {"test", "あい"}, new int[] {0, 4}, new int[] {4, 6}, new string[] {"<ALPHANUM>", "<DOUBLE>"}, new int[] {1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix() throws java.io.IOException
+ public virtual void testMix()
+ {
+ assertAnalyzesTo(analyzer, "あいうえおabcかきくけこ", new string[] {"あい", "いう", "うえ", "えお", "abc", "かき", "きく", "くけ", "けこ"}, new int[] {0, 1, 2, 3, 5, 8, 9, 10, 11}, new int[] {2, 3, 4, 5, 8, 10, 11, 12, 13}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix2() throws java.io.IOException
+ public virtual void testMix2()
+ {
+ assertAnalyzesTo(analyzer, "あいうえおabんcかきくけ こ", new string[] {"あい", "いう", "うえ", "えお", "ab", "ん", "c", "かき", "きく", "くけ", "こ"}, new int[] {0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14}, new int[] {2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<SINGLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+ }
+
+ /// <summary>
+ /// Non-english text (outside of CJK) is treated normally, according to unicode rules
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographic() throws java.io.IOException
+ public virtual void testNonIdeographic()
+ {
+ assertAnalyzesTo(analyzer, "一 روبرت موير", new string[] {"一", "روبرت", "موير"}, new int[] {0, 2, 8}, new int[] {1, 7, 12}, new string[] {"<SINGLE>", "<ALPHANUM>", "<ALPHANUM>"}, new int[] {1, 1, 1});
+ }
+
+ /// <summary>
+ /// Same as the above, except with a nonspacing mark to show correctness.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographicNonLetter() throws java.io.IOException
+ public virtual void testNonIdeographicNonLetter()
+ {
+ assertAnalyzesTo(analyzer, "一 رُوبرت موير", new string[] {"一", "رُوبرت", "موير"}, new int[] {0, 2, 9}, new int[] {1, 8, 13}, new string[] {"<SINGLE>", "<ALPHANUM>", "<ALPHANUM>"}, new int[] {1, 1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSurrogates() throws java.io.IOException
+ public virtual void testSurrogates()
+ {
+ assertAnalyzesTo(analyzer, "𩬅艱鍟䇹愯瀛", new string[] {"𩬅艱", "艱鍟", "鍟䇹", "䇹愯", "愯瀛"}, new int[] {0, 2, 3, 4, 5}, new int[] {3, 4, 5, 6, 7}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws java.io.IOException
+ public virtual void testReusableTokenStream()
+ {
+ assertAnalyzesTo(analyzer, "あいうえおabcかきくけこ", new string[] {"あい", "いう", "うえ", "えお", "abc", "かき", "きく", "くけ", "けこ"}, new int[] {0, 1, 2, 3, 5, 8, 9, 10, 11}, new int[] {2, 3, 4, 5, 8, 10, 11, 12, 13}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1});
+
+ assertAnalyzesTo(analyzer, "あいうえおabんcかきくけ こ", new string[] {"あい", "いう", "うえ", "えお", "ab", "ん", "c", "かき", "きく", "くけ", "こ"}, new int[] {0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14}, new int[] {2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<SINGLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleChar() throws java.io.IOException
+ public virtual void testSingleChar()
+ {
+ assertAnalyzesTo(analyzer, "一", new string[] {"一"}, new int[] {0}, new int[] {1}, new string[] {"<SINGLE>"}, new int[] {1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenStream() throws java.io.IOException
+ public virtual void testTokenStream()
+ {
+ assertAnalyzesTo(analyzer, "一丁丂", new string[] {"一丁", "丁丂"}, new int[] {0, 1}, new int[] {2, 3}, new string[] {"<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1});
+ }
+
+ /// <summary>
+ /// test that offsets are correct when mappingcharfilter is previously applied </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException
+ public virtual void testChangedOffsets()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ builder.add("a", "一二");
+ builder.add("b", "二三");
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build();
+ NormalizeCharMap norm = builder.build();
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm);
+
+ assertAnalyzesTo(analyzer, "ab", new string[] {"一二", "二二", "二三"}, new int[] {0, 0, 1}, new int[] {1, 1, 2});
+
+ // note: offsets are strange since this is how the charfilter maps them...
+ // before bigramming, the 4 tokens look like:
+ // { 0, 0, 1, 1 },
+ // { 0, 1, 1, 2 }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestCJKAnalyzer outerInstance;
+
+ private NormalizeCharMap norm;
+
+ public AnalyzerAnonymousInnerClassHelper(TestCJKAnalyzer outerInstance, NormalizeCharMap norm)
+ {
+ this.outerInstance = outerInstance;
+ this.norm = norm;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ return new MappingCharFilter(norm, reader);
+ }
+ }
+
+ private class FakeStandardTokenizer : TokenFilter
+ {
+ internal readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+ public FakeStandardTokenizer(TokenStream input) : base(input)
+ {
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+ public override bool incrementToken()
+ {
+ if (input.incrementToken())
+ {
+ typeAtt.Type = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleChar2() throws Exception
+ public virtual void testSingleChar2()
+ {
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+
+ assertAnalyzesTo(analyzer, "一", new string[] {"一"}, new int[] {0}, new int[] {1}, new string[] {"<SINGLE>"}, new int[] {1});
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestCJKAnalyzer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestCJKAnalyzer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenFilter filter = new FakeStandardTokenizer(tokenizer);
+ filter = new StopFilter(TEST_VERSION_CURRENT, filter, CharArraySet.EMPTY_SET);
+ filter = new CJKBigramFilter(filter);
+ return new TokenStreamComponents(tokenizer, filter);
+ }
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new CJKAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+ public virtual void testRandomHugeStrings()
+ {
+ Random random = random();
+ checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestCJKAnalyzer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestCJKAnalyzer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file