You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2009/11/18 18:51:28 UTC
svn commit: r881850 -
/incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
Author: digy
Date: Wed Nov 18 17:51:28 2009
New Revision: 881850
URL: http://svn.apache.org/viewvc?rev=881850&view=rev
Log:
LUCENENET-281 TestCJK on TestQueryParser fails
Modified:
incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
Modified: incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/QueryParser/TestQueryParser.cs?rev=881850&r1=881849&r2=881850&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs Wed Nov 18 17:51:28 2009
@@ -294,13 +294,29 @@
}
[Test]
- public virtual void TestCJK()
- {
- // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
- // used google to translate the word "term" to japanese -> çâ¨èªž
- AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
- AssertQueryEquals("çâ¨èªž\u3000çâ¨èªž\u3000çâ¨èªž", null, "çâ¨èªž\u0020çâ¨èªž\u0020çâ¨èªž");
- }
+ public virtual void TestCJK()
+ {
+ // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
+ // used google to translate the word "term" to japanese -> çâ¨èªž
+ //
+ // NOTE: What is printed above is not the translation of "term" into
+ // Japanese. Google translate currently gives:
+ //
+ // æé
+ //
+ // Which translates to unicode characters 26399 and 38291, or
+ // the literals '\u671f' and '\u9593'.
+ //
+ // Unlike the second and third characters in the previous string ('\u201d' and '\u00a8')
+ // which fail the test for IsCharacter when tokenized by LetterTokenizer (as it should
+ // in Java), which causes the word to be split differently than if it actually used
+ // letters as defined by Unicode.
+ //
+ // Using the string "\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593" with just the two
+ // characters is enough, as it uses two characters with the full width of a CJK character cell.
+ AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
+ AssertQueryEquals("\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593", null, "\u671f\u9593\u0020\u671f\u9593\u0020\u671f\u9593");
+ }
[Test]
public virtual void TestSimple()
RE: svn commit: r881850 - /incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
Posted by George Aroush <ge...@aroush.net>.
Hi Everyone,
Just a small nit-picking here.
Patches should be to fix issues, and nothing more. There is no need to bring in additional comments or code that don't exists in the Java Lucene world _unless_ if the comment and code is to explain behavior difference between the two languages, or environment that is _not_ obvious.
It's really important to keep this discipline as it will help us with the next port.
Thanks.
-- George
-----Original Message-----
From: digy@apache.org [mailto:digy@apache.org]
Sent: Wednesday, November 18, 2009 12:51 PM
To: lucene-net-commits@incubator.apache.org
Subject: svn commit: r881850 - /incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
Author: digy
Date: Wed Nov 18 17:51:28 2009
New Revision: 881850
URL: http://svn.apache.org/viewvc?rev=881850&view=rev
Log:
LUCENENET-281 TestCJK on TestQueryParser fails
Modified:
incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
Modified: incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Test/QueryParser/TestQueryParser.cs?rev=881850&r1=881849&r2=881850&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Test/QueryParser/TestQueryParser.cs Wed Nov 18 17:51:28 2009
@@ -294,13 +294,29 @@
}
[Test]
- public virtual void TestCJK()
- {
- // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
- // used google to translate the word "term" to japanese -> ç�?�¨èªž
- AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
- AssertQueryEquals("ç�?�¨èªž\u3000ç�?�¨èªž\u3000ç�?�¨èªž", null, "ç�?�¨èªž\u0020ç�?�¨èªž\u0020ç�?�¨èªž");
- }
+ public virtual void TestCJK()
+ {
+ // Test Ideographic Space - As wide as a CJK character cell (fullwidth)
+ // used google to translate the word "term" to japanese -> ç�?�¨èªž
+ //
+ // NOTE: What is printed above is not the translation of "term" into
+ // Japanese. Google translate currently gives:
+ //
+ // �??�??
+ //
+ // Which translates to unicode characters 26399 and 38291, or
+ // the literals '\u671f' and '\u9593'.
+ //
+ // Unlike the second and third characters in the previous string ('\u201d' and '\u00a8')
+ // which fail the test for IsCharacter when tokenized by LetterTokenizer (as it should
+ // in Java), which causes the word to be split differently than if it actually used
+ // letters as defined by Unicode.
+ //
+ // Using the string "\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593" with just the two
+ // characters is enough, as it uses two characters with the full width of a CJK character cell.
+ AssertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term");
+ AssertQueryEquals("\u671f\u9593\u3000\u671f\u9593\u3000\u671f\u9593", null, "\u671f\u9593\u0020\u671f\u9593\u0020\u671f\u9593");
+ }
[Test]
public virtual void TestSimple()