You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2009/11/18 18:51:28 UTC

svn commit: r881850 - /incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs

Author: digy
Date: Wed Nov 18 17:51:28 2009
New Revision: 881850

URL: http://svn.apache.org/viewvc?rev=881850&view=rev
Log:
LUCENENET-281 TestCJK on TestQueryParser fails

Modified:
    incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs

Modified: incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/QueryParser/TestQueryParser.cs?rev=881850&r1=881849&r2=881850&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs Wed Nov 18 17:51:28 2009
@@ -294,13 +294,29 @@
 		}
 		
 		[Test]
-		public virtual void  TestCJK()
-		{
-			// Test Ideographic Space - As wide as a CJK character cell (fullwidth)
-			// used google to translate the word "term" to japanese -> 用語
-			AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
-			AssertQueryEquals("用語\u3000用語\u3000用語", null, "用語\u0020用語\u0020用語");
-		}
+        public virtual void TestCJK()
+        {
+            // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
+            // used google to translate the word "term" to japanese -> 用語
+            //
+            // NOTE: What is printed above is not the translation of "term" into
+            // Japanese.  Google translate currently gives:
+            //
+            // 期間
+            //
+            // Which translates to unicode characters 26399 and 38291, or
+            // the literals '\u671f' and '\u9593'.
+            //
+            // Unlike the second and third characters in the previous string ('\u201d' and '\u00a8')
+            // which fail the test for IsCharacter when tokenized by LetterTokenizer (as it should
+            // in Java), which causes the word to be split differently than if it actually used
+            // letters as defined by Unicode.
+            //
+            // Using the string "\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593" with just the two
+            // characters is enough, as it uses two characters with the full width of a CJK character cell.
+            AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
+            AssertQueryEquals("\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593", null, "\u671f\u9593\u0020\u671f\u9593\u0020\u671f\u9593");
+        }
 		
 		[Test]
 		public virtual void  TestSimple()



RE: svn commit: r881850 - /incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs

Posted by George Aroush <ge...@aroush.net>.
Hi Everyone,

Just a small nit-picking here.

Patches should be to fix issues, and nothing more.  There is no need to bring in additional comments or code that don't exists in the Java Lucene world _unless_ if the comment and code is to explain behavior difference between the two languages, or environment that is _not_ obvious.

It's really important to keep this discipline as it will help us with the next port.

Thanks.

-- George

-----Original Message-----
From: digy@apache.org [mailto:digy@apache.org] 
Sent: Wednesday, November 18, 2009 12:51 PM
To: lucene-net-commits@incubator.apache.org
Subject: svn commit: r881850 - /incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs

Author: digy
Date: Wed Nov 18 17:51:28 2009
New Revision: 881850

URL: http://svn.apache.org/viewvc?rev=881850&view=rev
Log:
LUCENENET-281 TestCJK on TestQueryParser fails

Modified:
    incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs

Modified: incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/QueryParser/TestQueryParser.cs?rev=881850&r1=881849&r2=881850&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs Wed Nov 18 17:51:28 2009
@@ -294,13 +294,29 @@
 		}
 		
 		[Test]
-		public virtual void  TestCJK()
-		{
-			// Test Ideographic Space - As wide as a CJK character cell (fullwidth)
-			// used google to translate the word "term" to japanese -> ç�?�¨èªž
-			AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
-			AssertQueryEquals("ç�?�¨èªž\u3000ç�?�¨èªž\u3000ç�?�¨èªž", null, "ç�?�¨èªž\u0020ç�?�¨èªž\u0020ç�?�¨èªž");
-		}
+        public virtual void TestCJK()
+        {
+            // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
+            // used google to translate the word "term" to japanese -> ç�?�¨èªž
+            //
+            // NOTE: What is printed above is not the translation of "term" into
+            // Japanese.  Google translate currently gives:
+            //
+            // �??�??
+            //
+            // Which translates to unicode characters 26399 and 38291, or
+            // the literals '\u671f' and '\u9593'.
+            //
+            // Unlike the second and third characters in the previous string ('\u201d' and '\u00a8')
+            // which fail the test for IsCharacter when tokenized by LetterTokenizer (as it should
+            // in Java), which causes the word to be split differently than if it actually used
+            // letters as defined by Unicode.
+            //
+            // Using the string "\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593" with just the two
+            // characters is enough, as it uses two characters with the full width of a CJK character cell.
+            AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
+            AssertQueryEquals("\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593", null, "\u671f\u9593\u0020\u671f\u9593\u0020\u671f\u9593");
+        }
 		
 		[Test]
 		public virtual void  TestSimple()