You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@commons.apache.org by "Thomas Smets - A3 SYSTEM (JIRA)" <ji...@apache.org> on 2010/05/12 11:04:42 UTC
[jira] Created: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
--------------------------------------------------------------------------
Key: LANG-623
URL: https://issues.apache.org/jira/browse/LANG-623
Project: Commons Lang
Issue Type: Bug
Affects Versions: 2.5
Environment: W2K3-server / JDK 1.5 (from SAP)
Reporter: Thomas Smets - A3 SYSTEM
Failing test case :
<snip>
@Ignore
public void convert_English_Thorn(){
beforeConversion = "AAÞþaa";
expectedAfterConversion = "aattaa";
Assert.assertEquals("Test NOT correctly configured length not correct",
beforeConversion.length(),
expectedAfterConversion.length());
afterConversion = converter.replaceAccents(beforeConversion);
Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
}
</snip>
<snip>
/**
* This method replace some characters (typically accented characters with their no accented counterpart).
* The mapping are defined in the file loaded by {@link #loadNameConversionFile(String)} from the <code>classpath</code>.
*
* @param aField
* @return a lowercase version of the String with all the special character replaced
*
* @see #removeWeirdCharactersFromName(String)
* @see StringUtils#lowerCase(String)
* @see String#replaceAll(String, String)
*/
String replaceAccents(final String aField) {
String result = StringUtils.lowerCase(aField),
target = null,
charsToRemove = null;
char charToPut = Constants.UNDERSCORE_CHARACTER,
charToRemove = Constants.UNDERSCORE_CHARACTER;
for (Object element : conversionMappings.keySet()) {
target = element.toString();
if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
continue;
}
result = StringUtils.stripToNull(result);
charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
charToPut = target.charAt(Constants.ZERO);
for (int i=0; i<charsToRemove.length(); i++) {
charToRemove = charsToRemove.charAt(i);
result = StringUtils.replaceChars(result, charToRemove, charToPut);
}
}
return result;
}
</snip>
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Posted by "Thomas Smets - A3 SYSTEM (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/LANG-623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Thomas Smets - A3 SYSTEM updated LANG-623:
------------------------------------------
Description:
Failing test case :
<snip>
@Ignore
public void convert_English_Thorn(){
beforeConversion = "AAÞþaa";
expectedAfterConversion = "aattaa";
Assert.assertEquals("Test NOT correctly configured length not correct",
beforeConversion.length(),
expectedAfterConversion.length());
afterConversion = converter.replaceAccents(beforeConversion);
Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
}
</snip>
<snip>
String replaceAccents(final String aField) {
String result = StringUtils.lowerCase(aField),
target = null,
charsToRemove = null;
char charToPut = Constants.UNDERSCORE_CHARACTER,
charToRemove = Constants.UNDERSCORE_CHARACTER;
for (Object element : conversionMappings.keySet()) {
target = element.toString();
if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
continue;
}
result = StringUtils.stripToNull(result);
charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
charToPut = target.charAt(Constants.ZERO);
for (int i=0; i<charsToRemove.length(); i++) {
charToRemove = charsToRemove.charAt(i);
result = StringUtils.replaceChars(result, charToRemove, charToPut);
}
}
return result;
}
</snip>
was:
Failing test case :
<snip>
@Ignore
public void convert_English_Thorn(){
beforeConversion = "AAÞþaa";
expectedAfterConversion = "aattaa";
Assert.assertEquals("Test NOT correctly configured length not correct",
beforeConversion.length(),
expectedAfterConversion.length());
afterConversion = converter.replaceAccents(beforeConversion);
Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
}
</snip>
<snip>
/**
* This method replace some characters (typically accented characters with their no accented counterpart).
* The mapping are defined in the file loaded by {@link #loadNameConversionFile(String)} from the <code>classpath</code>.
*
* @param aField
* @return a lowercase version of the String with all the special character replaced
*
* @see #removeWeirdCharactersFromName(String)
* @see StringUtils#lowerCase(String)
* @see String#replaceAll(String, String)
*/
String replaceAccents(final String aField) {
String result = StringUtils.lowerCase(aField),
target = null,
charsToRemove = null;
char charToPut = Constants.UNDERSCORE_CHARACTER,
charToRemove = Constants.UNDERSCORE_CHARACTER;
for (Object element : conversionMappings.keySet()) {
target = element.toString();
if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
continue;
}
result = StringUtils.stripToNull(result);
charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
charToPut = target.charAt(Constants.ZERO);
for (int i=0; i<charsToRemove.length(); i++) {
charToRemove = charsToRemove.charAt(i);
result = StringUtils.replaceChars(result, charToRemove, charToPut);
}
}
return result;
}
</snip>
> Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
> --------------------------------------------------------------------------
>
> Key: LANG-623
> URL: https://issues.apache.org/jira/browse/LANG-623
> Project: Commons Lang
> Issue Type: Bug
> Affects Versions: 2.5
> Environment: W2K3-server / JDK 1.5 (from SAP)
> Reporter: Thomas Smets - A3 SYSTEM
> Attachments: ConversionMappings.UTF8
>
>
> Failing test case :
> <snip>
> @Ignore
> public void convert_English_Thorn(){
> beforeConversion = "AAÞþaa";
> expectedAfterConversion = "aattaa";
> Assert.assertEquals("Test NOT correctly configured length not correct",
> beforeConversion.length(),
> expectedAfterConversion.length());
> afterConversion = converter.replaceAccents(beforeConversion);
> Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
> }
> </snip>
> <snip>
>
> String replaceAccents(final String aField) {
> String result = StringUtils.lowerCase(aField),
> target = null,
> charsToRemove = null;
> char charToPut = Constants.UNDERSCORE_CHARACTER,
> charToRemove = Constants.UNDERSCORE_CHARACTER;
> for (Object element : conversionMappings.keySet()) {
> target = element.toString();
> if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
> continue;
> }
> result = StringUtils.stripToNull(result);
> charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
> charToPut = target.charAt(Constants.ZERO);
> for (int i=0; i<charsToRemove.length(); i++) {
> charToRemove = charsToRemove.charAt(i);
> result = StringUtils.replaceChars(result, charToRemove, charToPut);
> }
> }
> return result;
> }
> </snip>
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Posted by "Joerg Schaible (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/LANG-623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Joerg Schaible updated LANG-623:
--------------------------------
Description:
Failing test case :
{code}
@Ignore
public void convert_English_Thorn(){
beforeConversion = "AAÞþaa";
expectedAfterConversion = "aattaa";
Assert.assertEquals("Test NOT correctly configured length not correct",
beforeConversion.length(),
expectedAfterConversion.length());
afterConversion = converter.replaceAccents(beforeConversion);
Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
}
{code}
{code}
String replaceAccents(final String aField) {
String result = StringUtils.lowerCase(aField),
target = null,
charsToRemove = null;
char charToPut = Constants.UNDERSCORE_CHARACTER,
charToRemove = Constants.UNDERSCORE_CHARACTER;
for (Object element : conversionMappings.keySet()) {
target = element.toString();
if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
continue;
}
result = StringUtils.stripToNull(result);
charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
charToPut = target.charAt(Constants.ZERO);
for (int i=0; i<charsToRemove.length(); i++) {
charToRemove = charsToRemove.charAt(i);
result = StringUtils.replaceChars(result, charToRemove, charToPut);
}
}
return result;
}
{code}
was:
Failing test case :
<snip>
@Ignore
public void convert_English_Thorn(){
beforeConversion = "AAÞþaa";
expectedAfterConversion = "aattaa";
Assert.assertEquals("Test NOT correctly configured length not correct",
beforeConversion.length(),
expectedAfterConversion.length());
afterConversion = converter.replaceAccents(beforeConversion);
Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
}
</snip>
<snip>
String replaceAccents(final String aField) {
String result = StringUtils.lowerCase(aField),
target = null,
charsToRemove = null;
char charToPut = Constants.UNDERSCORE_CHARACTER,
charToRemove = Constants.UNDERSCORE_CHARACTER;
for (Object element : conversionMappings.keySet()) {
target = element.toString();
if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
continue;
}
result = StringUtils.stripToNull(result);
charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
charToPut = target.charAt(Constants.ZERO);
for (int i=0; i<charsToRemove.length(); i++) {
charToRemove = charsToRemove.charAt(i);
result = StringUtils.replaceChars(result, charToRemove, charToPut);
}
}
return result;
}
</snip>
> Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
> --------------------------------------------------------------------------
>
> Key: LANG-623
> URL: https://issues.apache.org/jira/browse/LANG-623
> Project: Commons Lang
> Issue Type: Bug
> Components: lang.*
> Affects Versions: 2.5
> Environment: W2K3-server / JDK 1.5 (from SAP)
> Reporter: Thomas Smets - A3 SYSTEM
> Attachments: ConversionMappings.UTF8
>
>
> Failing test case :
> {code}
> @Ignore
> public void convert_English_Thorn(){
> beforeConversion = "AAÞþaa";
> expectedAfterConversion = "aattaa";
> Assert.assertEquals("Test NOT correctly configured length not correct",
> beforeConversion.length(),
> expectedAfterConversion.length());
> afterConversion = converter.replaceAccents(beforeConversion);
> Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
> }
> {code}
> {code}
>
> String replaceAccents(final String aField) {
> String result = StringUtils.lowerCase(aField),
> target = null,
> charsToRemove = null;
> char charToPut = Constants.UNDERSCORE_CHARACTER,
> charToRemove = Constants.UNDERSCORE_CHARACTER;
> for (Object element : conversionMappings.keySet()) {
> target = element.toString();
> if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
> continue;
> }
> result = StringUtils.stripToNull(result);
> charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
> charToPut = target.charAt(Constants.ZERO);
> for (int i=0; i<charsToRemove.length(); i++) {
> charToRemove = charsToRemove.charAt(i);
> result = StringUtils.replaceChars(result, charToRemove, charToPut);
> }
> }
> return result;
> }
> {code}
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Closed: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Posted by "Henri Yandell (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/LANG-623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Henri Yandell closed LANG-623.
------------------------------
Resolution: Cannot Reproduce
Seems odd.
Testing, the following passes:
+ public void testLang623() {
+ assertEquals("t", StringUtils.replaceChars("\u00DE", '\u00DE', 't'));
+ assertEquals("t", StringUtils.replaceChars("\u00FE", '\u00FE', 't'));
+ }
So I think the issue is outside of StringUtils.replace.
Closing the issue, but please reopen if something is wrong with my test above.
> Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
> --------------------------------------------------------------------------
>
> Key: LANG-623
> URL: https://issues.apache.org/jira/browse/LANG-623
> Project: Commons Lang
> Issue Type: Bug
> Components: lang.*
> Affects Versions: 2.5
> Environment: W2K3-server / JDK 1.5 (from SAP)
> Reporter: Thomas Smets - A3 SYSTEM
> Attachments: ConversionMappings.UTF8
>
>
> Failing test case :
> <snip>
> @Ignore
> public void convert_English_Thorn(){
> beforeConversion = "AAÞþaa";
> expectedAfterConversion = "aattaa";
> Assert.assertEquals("Test NOT correctly configured length not correct",
> beforeConversion.length(),
> expectedAfterConversion.length());
> afterConversion = converter.replaceAccents(beforeConversion);
> Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
> }
> </snip>
> <snip>
>
> String replaceAccents(final String aField) {
> String result = StringUtils.lowerCase(aField),
> target = null,
> charsToRemove = null;
> char charToPut = Constants.UNDERSCORE_CHARACTER,
> charToRemove = Constants.UNDERSCORE_CHARACTER;
> for (Object element : conversionMappings.keySet()) {
> target = element.toString();
> if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
> continue;
> }
> result = StringUtils.stripToNull(result);
> charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
> charToPut = target.charAt(Constants.ZERO);
> for (int i=0; i<charsToRemove.length(); i++) {
> charToRemove = charsToRemove.charAt(i);
> result = StringUtils.replaceChars(result, charToRemove, charToPut);
> }
> }
> return result;
> }
> </snip>
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Posted by "Henri Yandell (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/LANG-623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Henri Yandell updated LANG-623:
-------------------------------
Component/s: lang.*
> Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
> --------------------------------------------------------------------------
>
> Key: LANG-623
> URL: https://issues.apache.org/jira/browse/LANG-623
> Project: Commons Lang
> Issue Type: Bug
> Components: lang.*
> Affects Versions: 2.5
> Environment: W2K3-server / JDK 1.5 (from SAP)
> Reporter: Thomas Smets - A3 SYSTEM
> Attachments: ConversionMappings.UTF8
>
>
> Failing test case :
> <snip>
> @Ignore
> public void convert_English_Thorn(){
> beforeConversion = "AAÞþaa";
> expectedAfterConversion = "aattaa";
> Assert.assertEquals("Test NOT correctly configured length not correct",
> beforeConversion.length(),
> expectedAfterConversion.length());
> afterConversion = converter.replaceAccents(beforeConversion);
> Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
> }
> </snip>
> <snip>
>
> String replaceAccents(final String aField) {
> String result = StringUtils.lowerCase(aField),
> target = null,
> charsToRemove = null;
> char charToPut = Constants.UNDERSCORE_CHARACTER,
> charToRemove = Constants.UNDERSCORE_CHARACTER;
> for (Object element : conversionMappings.keySet()) {
> target = element.toString();
> if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
> continue;
> }
> result = StringUtils.stripToNull(result);
> charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
> charToPut = target.charAt(Constants.ZERO);
> for (int i=0; i<charsToRemove.length(); i++) {
> charToRemove = charsToRemove.charAt(i);
> result = StringUtils.replaceChars(result, charToRemove, charToPut);
> }
> }
> return result;
> }
> </snip>
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Commented: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Posted by "Joerg Schaible (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/LANG-623?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12869902#action_12869902 ]
Joerg Schaible commented on LANG-623:
-------------------------------------
This test can only proof something if Thomas set the encoding option of the compiler to utf-8. Otherwise the test is simply wrong. Therefore it is always a bad idea to put non-ASCII characters directly into source.
> Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
> --------------------------------------------------------------------------
>
> Key: LANG-623
> URL: https://issues.apache.org/jira/browse/LANG-623
> Project: Commons Lang
> Issue Type: Bug
> Components: lang.*
> Affects Versions: 2.5
> Environment: W2K3-server / JDK 1.5 (from SAP)
> Reporter: Thomas Smets - A3 SYSTEM
> Attachments: ConversionMappings.UTF8
>
>
> Failing test case :
> {code}
> @Ignore
> public void convert_English_Thorn(){
> beforeConversion = "AAÞþaa";
> expectedAfterConversion = "aattaa";
> Assert.assertEquals("Test NOT correctly configured length not correct",
> beforeConversion.length(),
> expectedAfterConversion.length());
> afterConversion = converter.replaceAccents(beforeConversion);
> Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
> }
> {code}
> {code}
>
> String replaceAccents(final String aField) {
> String result = StringUtils.lowerCase(aField),
> target = null,
> charsToRemove = null;
> char charToPut = Constants.UNDERSCORE_CHARACTER,
> charToRemove = Constants.UNDERSCORE_CHARACTER;
> for (Object element : conversionMappings.keySet()) {
> target = element.toString();
> if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
> continue;
> }
> result = StringUtils.stripToNull(result);
> charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
> charToPut = target.charAt(Constants.ZERO);
> for (int i=0; i<charsToRemove.length(); i++) {
> charToRemove = charsToRemove.charAt(i);
> result = StringUtils.replaceChars(result, charToRemove, charToPut);
> }
> }
> return result;
> }
> {code}
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (LANG-623) Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
Posted by "Thomas Smets - A3 SYSTEM (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/LANG-623?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Thomas Smets - A3 SYSTEM updated LANG-623:
------------------------------------------
Attachment: ConversionMappings.UTF8
This file in pure UTF-8 encoding (Eclipse or jEdit) is converted into a property-file through an ANT-task (native2ascii) :
<echo>Converting ...</echo>
<native2ascii encoding="UTF-8"
src="${resources.dir}"
dest="${build.dir}"
includes="**/*.UTF8" ext=".properties"
description="Conversion the weird foreign characters into proper UTF-8 escaped characters" />
<echo>Conversion done !</echo>
> Replace characters seems to not be able to replace Icelanding Thorn (Þ, þ)
> --------------------------------------------------------------------------
>
> Key: LANG-623
> URL: https://issues.apache.org/jira/browse/LANG-623
> Project: Commons Lang
> Issue Type: Bug
> Affects Versions: 2.5
> Environment: W2K3-server / JDK 1.5 (from SAP)
> Reporter: Thomas Smets - A3 SYSTEM
> Attachments: ConversionMappings.UTF8
>
>
> Failing test case :
> <snip>
> @Ignore
> public void convert_English_Thorn(){
> beforeConversion = "AAÞþaa";
> expectedAfterConversion = "aattaa";
> Assert.assertEquals("Test NOT correctly configured length not correct",
> beforeConversion.length(),
> expectedAfterConversion.length());
> afterConversion = converter.replaceAccents(beforeConversion);
> Assert.assertEquals("Cannot convert (Icelandic/old english thorn) '" + beforeConversion + "'.", expectedAfterConversion, afterConversion);
> }
> </snip>
> <snip>
> /**
> * This method replace some characters (typically accented characters with their no accented counterpart).
> * The mapping are defined in the file loaded by {@link #loadNameConversionFile(String)} from the <code>classpath</code>.
> *
> * @param aField
> * @return a lowercase version of the String with all the special character replaced
> *
> * @see #removeWeirdCharactersFromName(String)
> * @see StringUtils#lowerCase(String)
> * @see String#replaceAll(String, String)
> */
> String replaceAccents(final String aField) {
> String result = StringUtils.lowerCase(aField),
> target = null,
> charsToRemove = null;
> char charToPut = Constants.UNDERSCORE_CHARACTER,
> charToRemove = Constants.UNDERSCORE_CHARACTER;
> for (Object element : conversionMappings.keySet()) {
> target = element.toString();
> if (target.equals(Constants.SPECIAL_CHARACTER_KEY)) {
> continue;
> }
> result = StringUtils.stripToNull(result);
> charsToRemove = conversionMappings.getProperty(target, Constants.EMPTY_STRING).toString();
> charToPut = target.charAt(Constants.ZERO);
> for (int i=0; i<charsToRemove.length(); i++) {
> charToRemove = charsToRemove.charAt(i);
> result = StringUtils.replaceChars(result, charToRemove, charToPut);
> }
> }
> return result;
> }
> </snip>
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.