You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by gr...@apache.org on 2005/02/04 04:15:07 UTC
svn commit: r151307 - in lenya/trunk/src/java/org/apache/lenya:
lucene/ReTokenizeFile.java lucene/parser/SwingHTMLHandler.java
search/crawler/HTMLHandler.java search/crawler/IterativeHTMLCrawler.java
util/Log4Echo.java util/TidyCommandLine.java util/URLUtil.java
Author: gregor
Date: Thu Feb 3 19:15:03 2005
New Revision: 151307
URL: http://svn.apache.org/viewcvs?view=rev&rev=151307
Log:
Fix dubious code to use a locale for toUpperCase / toLowerCase methods
Modified:
lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java
lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java
lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java
lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java
lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java
lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java Thu Feb 3 19:15:03 2005
@@ -27,6 +27,7 @@
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
+import java.util.Locale;
import java.util.StringTokenizer;
import org.apache.lenya.lucene.html.HTMLParser;
@@ -128,7 +129,7 @@
int index = -1;
for (int i = 0; i < words.length; i++) {
- index = content.toLowerCase().indexOf(words[i].toLowerCase());
+ index = content.toLowerCase(Locale.ENGLISH).indexOf(words[i].toLowerCase(Locale.ENGLISH));
if (index >= 0) {
int start = index - offset;
@@ -207,7 +208,7 @@
public String emphasizeAsXML(String string, String[] words) {
String emphasizedString = "... Hello <word>World</word>! ...";
- String lowerCaseString = string.toLowerCase();
+ String lowerCaseString = string.toLowerCase(Locale.ENGLISH);
for (int i = 0; i < words.length; i++) {
String word = words[i].toLowerCase();
@@ -258,7 +259,7 @@
// test if the file contains xml data and extract the encoding
int endOfFirstTag = content.indexOf(">");
if(endOfFirstTag > 0 && content.charAt(endOfFirstTag-1) == '?') {
- String upperLine = content.substring(0, endOfFirstTag).toUpperCase();
+ String upperLine = content.substring(0, endOfFirstTag).toUpperCase(Locale.ENGLISH);
int encStart = upperLine.indexOf("ENCODING=")+10;
int encEnd = -1;
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java Thu Feb 3 19:15:03 2005
@@ -24,6 +24,7 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
@@ -250,7 +251,7 @@
if (name.equals(LUCENE_TAG_NAME)) {
String tagName = (String) valueObject;
- HTML.Tag tag = HTML.getTag(tagName.toLowerCase());
+ HTML.Tag tag = HTML.getTag(tagName.toLowerCase(Locale.ENGLISH));
setLuceneTag(tag);
}
Modified: lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java Thu Feb 3 19:15:03 2005
@@ -26,6 +26,7 @@
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
@@ -181,7 +182,7 @@
* @param attribs DOCUMENT ME!
*/
public void handleAnchor(MutableAttributeSet attribs) {
- String href = new String();
+ String href = "";
href = (String) attribs.getAttribute(HTML.Attribute.HREF);
@@ -232,9 +233,8 @@
* @param attribs DOCUMENT ME!
*/
public void handleMeta(MutableAttributeSet attribs) {
- String name = new String();
-
- String content = new String();
+ String name = "";
+ String content = "";
name = (String) attribs.getAttribute(HTML.Attribute.NAME);
@@ -244,7 +244,7 @@
return;
}
- name = name.toUpperCase();
+ name = name.toUpperCase(Locale.ENGLISH);
if (name.equals("DESCRIPTION")) {
description = content;
Modified: lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java Thu Feb 3 19:15:03 2005
@@ -24,6 +24,7 @@
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Locale;
import java.util.StringTokenizer;
import websphinx.RobotExclusion;
@@ -236,7 +237,7 @@
*/
public URL addURL(String urlCandidate, String currentURLPath)
throws MalformedURLException {
- URL url = new URL(parseHREF(urlCandidate, urlCandidate.toLowerCase(), currentURLPath));
+ URL url = new URL(parseHREF(urlCandidate, urlCandidate.toLowerCase(Locale.ENGLISH), currentURLPath));
//completeURL(currentURL,urlCandidate) new URL(currentURLPath+"/"+urlCandidate);
if (filterURL(urlCandidate, currentURLPath, urlsToCrawlLowerCase)) {
@@ -350,7 +351,7 @@
* @return DOCUMENT ME!
*/
public boolean filterURL(String url, String currentURLPath, java.util.TreeSet links) {
- String urlLowCase = url.toLowerCase();
+ String urlLowCase = url.toLowerCase(Locale.ENGLISH);
if (!(urlLowCase.startsWith("http://") || urlLowCase.startsWith("https://"))) {
url = parseHREF(url, urlLowCase, currentURLPath);
Modified: lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java Thu Feb 3 19:15:03 2005
@@ -19,6 +19,8 @@
package org.apache.lenya.util;
+import java.util.Locale;
+
import org.apache.log4j.Category;
@@ -39,7 +41,7 @@
return;
}
- String level = args[0].toLowerCase();
+ String level = args[0].toLowerCase(Locale.ENGLISH);
String message = args[1];
if (level.equals("debug")) {
log.debug(message);
Modified: lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java Thu Feb 3 19:15:03 2005
@@ -47,7 +47,7 @@
}
try {
- new TidyCommandLine().tidy(new URL(args[0]), new File(args[1]), new File(args[2]), true);
+ new TidyCommandLine().tidy(new URL(args[0]), new File(args[1]), new File(args[2]), true);
} catch (Exception e) {
System.err.println(e);
}
Modified: lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java Thu Feb 3 19:15:03 2005
@@ -19,6 +19,8 @@
package org.apache.lenya.util;
+import java.util.Locale;
+
/**
* DOCUMENT ME!
@@ -46,7 +48,7 @@
*/
public static String complete(String parent, String child) {
String url = child;
- String urlLowCase = child.toLowerCase();
+ String urlLowCase = child.toLowerCase(Locale.ENGLISH);
String currentURLPath = parent.substring(0, parent.lastIndexOf("/"));
String rootURL = parent.substring(0, parent.indexOf("/", 8));
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org