You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by gr...@apache.org on 2005/02/04 04:15:07 UTC

svn commit: r151307 - in lenya/trunk/src/java/org/apache/lenya: lucene/ReTokenizeFile.java lucene/parser/SwingHTMLHandler.java search/crawler/HTMLHandler.java search/crawler/IterativeHTMLCrawler.java util/Log4Echo.java util/TidyCommandLine.java util/URLUtil.java

Author: gregor
Date: Thu Feb  3 19:15:03 2005
New Revision: 151307

URL: http://svn.apache.org/viewcvs?view=rev&rev=151307
Log:
Fix dubious code to use a locale for toUpperCase / toLowerCase methods

Modified:
    lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java
    lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java
    lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java
    lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
    lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java
    lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java
    lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java

Modified: lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/ReTokenizeFile.java Thu Feb  3 19:15:03 2005
@@ -27,6 +27,7 @@
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
+import java.util.Locale;
 import java.util.StringTokenizer;
 
 import org.apache.lenya.lucene.html.HTMLParser;
@@ -128,7 +129,7 @@
         int index = -1;
 
         for (int i = 0; i < words.length; i++) {
-            index = content.toLowerCase().indexOf(words[i].toLowerCase());
+            index = content.toLowerCase(Locale.ENGLISH).indexOf(words[i].toLowerCase(Locale.ENGLISH));
 
             if (index >= 0) {
                 int start = index - offset;
@@ -207,7 +208,7 @@
     public String emphasizeAsXML(String string, String[] words) {
         String emphasizedString = "... Hello <word>World</word>! ...";
 
-        String lowerCaseString = string.toLowerCase();
+        String lowerCaseString = string.toLowerCase(Locale.ENGLISH);
 
         for (int i = 0; i < words.length; i++) {
             String word = words[i].toLowerCase();
@@ -258,7 +259,7 @@
         // test if the file contains xml data and extract the encoding
         int endOfFirstTag = content.indexOf(">");
         if(endOfFirstTag > 0 && content.charAt(endOfFirstTag-1) == '?') {
-            String upperLine = content.substring(0, endOfFirstTag).toUpperCase();
+            String upperLine = content.substring(0, endOfFirstTag).toUpperCase(Locale.ENGLISH);
             int encStart = upperLine.indexOf("ENCODING=")+10;
             int encEnd = -1;
 

Modified: lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/parser/SwingHTMLHandler.java Thu Feb  3 19:15:03 2005
@@ -24,6 +24,7 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 
 import javax.swing.text.MutableAttributeSet;
 import javax.swing.text.html.HTML;
@@ -250,7 +251,7 @@
 
             if (name.equals(LUCENE_TAG_NAME)) {
                 String tagName = (String) valueObject;
-                HTML.Tag tag = HTML.getTag(tagName.toLowerCase());
+                HTML.Tag tag = HTML.getTag(tagName.toLowerCase(Locale.ENGLISH));
                 setLuceneTag(tag);
             }
 

Modified: lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/search/crawler/HTMLHandler.java Thu Feb  3 19:15:03 2005
@@ -26,6 +26,7 @@
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 
 import javax.swing.text.MutableAttributeSet;
 import javax.swing.text.html.HTML;
@@ -181,7 +182,7 @@
      * @param attribs DOCUMENT ME!
      */
     public void handleAnchor(MutableAttributeSet attribs) {
-        String href = new String();
+        String href = "";
 
         href = (String) attribs.getAttribute(HTML.Attribute.HREF);
 
@@ -232,9 +233,8 @@
      * @param attribs DOCUMENT ME!
      */
     public void handleMeta(MutableAttributeSet attribs) {
-        String name = new String();
-
-        String content = new String();
+        String name = "";
+        String content = "";
 
         name = (String) attribs.getAttribute(HTML.Attribute.NAME);
 
@@ -244,7 +244,7 @@
             return;
         }
 
-        name = name.toUpperCase();
+        name = name.toUpperCase(Locale.ENGLISH);
 
         if (name.equals("DESCRIPTION")) {
             description = content;

Modified: lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java Thu Feb  3 19:15:03 2005
@@ -24,6 +24,7 @@
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.Locale;
 import java.util.StringTokenizer;
 
 import websphinx.RobotExclusion;
@@ -236,7 +237,7 @@
      */
     public URL addURL(String urlCandidate, String currentURLPath)
         throws MalformedURLException {
-        URL url = new URL(parseHREF(urlCandidate, urlCandidate.toLowerCase(), currentURLPath));
+        URL url = new URL(parseHREF(urlCandidate, urlCandidate.toLowerCase(Locale.ENGLISH), currentURLPath));
         //completeURL(currentURL,urlCandidate)  new URL(currentURLPath+"/"+urlCandidate);
 
         if (filterURL(urlCandidate, currentURLPath, urlsToCrawlLowerCase)) {
@@ -350,7 +351,7 @@
      * @return DOCUMENT ME!
      */
     public boolean filterURL(String url, String currentURLPath, java.util.TreeSet links) {
-        String urlLowCase = url.toLowerCase();
+        String urlLowCase = url.toLowerCase(Locale.ENGLISH);
 
         if (!(urlLowCase.startsWith("http://") || urlLowCase.startsWith("https://"))) {
             url = parseHREF(url, urlLowCase, currentURLPath);

Modified: lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/util/Log4Echo.java Thu Feb  3 19:15:03 2005
@@ -19,6 +19,8 @@
 
 package org.apache.lenya.util;
 
+import java.util.Locale;
+
 import org.apache.log4j.Category;
 
 
@@ -39,7 +41,7 @@
             return;
         }
 
-        String level = args[0].toLowerCase();
+        String level = args[0].toLowerCase(Locale.ENGLISH);
         String message = args[1];
         if (level.equals("debug")) {
             log.debug(message);

Modified: lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/util/TidyCommandLine.java Thu Feb  3 19:15:03 2005
@@ -47,7 +47,7 @@
         }
 
         try {
-            new TidyCommandLine().tidy(new URL(args[0]), new File(args[1]), new File(args[2]), true);
+        	new TidyCommandLine().tidy(new URL(args[0]), new File(args[1]), new File(args[2]), true);
         } catch (Exception e) {
             System.err.println(e);
         }

Modified: lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java?view=diff&r1=151306&r2=151307
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/util/URLUtil.java Thu Feb  3 19:15:03 2005
@@ -19,6 +19,8 @@
 
 package org.apache.lenya.util;
 
+import java.util.Locale;
+
 
 /**
  * DOCUMENT ME!
@@ -46,7 +48,7 @@
      */
     public static String complete(String parent, String child) {
         String url = child;
-        String urlLowCase = child.toLowerCase();
+        String urlLowCase = child.toLowerCase(Locale.ENGLISH);
         String currentURLPath = parent.substring(0, parent.lastIndexOf("/"));
         String rootURL = parent.substring(0, parent.indexOf("/", 8));
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org