You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jetspeed-dev@portals.apache.org by sh...@apache.org on 2005/06/15 23:22:20 UTC

svn commit: r190811 - /portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java

Author: shinsuke
Date: Wed Jun 15 14:22:20 2005
New Revision: 190811

URL: http://svn.apache.org/viewcvs?rev=190811&view=rev
Log:
fixed http://issues.apache.org/jira/browse/JS2-278

Modified:
    portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java

Modified: portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java
URL: http://svn.apache.org/viewcvs/portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java?rev=190811&r1=190810&r2=190811&view=diff
==============================================================================
--- portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java (original)
+++ portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java Wed Jun 15 14:22:20 2005
@@ -44,6 +44,7 @@
 import org.apache.jetspeed.rewriter.xml.SaxParserAdaptor;
 
 //standard java stuff
+import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.FileReader;
@@ -507,14 +508,74 @@
             HttpClient client = new HttpClient();
             GetMethod get = new GetMethod(uri);
             int status = client.executeMethod(get);
-            InputStream is = get.getResponseBodyAsStream();
-            // TODO need to parse HTML meta tag to get charset info
-            return new InputStreamReader(is, get.getResponseCharSet());
+            BufferedInputStream bis = new BufferedInputStream(get.getResponseBodyAsStream());
+            bis.mark(BLOCK_SIZE);
+            String encoding = getContentCharSet(bis);
+            if (encoding == null)
+            {
+                encoding = get.getResponseCharSet();
+            }
+            return new InputStreamReader(bis, encoding);
         }
         catch (IOException e)
         {
             throw new PortletException(e);
         }
     }
-    
+
+    private String getContentCharSet(InputStream is) throws IOException
+    {
+        if (!is.markSupported())
+        {
+            return null;
+        }
+
+        byte[] buf = new byte[BLOCK_SIZE];
+        try
+        {
+            is.read(buf, 0, BLOCK_SIZE);
+            String content = new String(buf, "ISO-8859-1");
+            String lowerCaseContent = content.toLowerCase();
+            int startIndex = lowerCaseContent.indexOf("<head");
+            if (startIndex == -1)
+            {
+                startIndex = 0;
+            }
+            int endIndex = lowerCaseContent.indexOf("</head");
+            if (endIndex == -1)
+            {
+                endIndex = content.length();
+            }
+            content = content.substring(startIndex, endIndex);
+
+            StringTokenizer st = new StringTokenizer(content, "<>");
+            while (st.hasMoreTokens())
+            {
+                String element = st.nextToken();
+                String lowerCaseElement = element.toLowerCase();
+                if (lowerCaseElement.startsWith("meta") && lowerCaseElement.indexOf("content-type") > 0)
+                {
+                    StringTokenizer est = new StringTokenizer(element, " =\"\';");
+                    while (est.hasMoreTokens())
+                    {
+                        if (est.nextToken().equalsIgnoreCase("charset"))
+                        {
+                            if (est.hasMoreTokens())
+                            {
+                                is.reset();
+                                return est.nextToken();
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch (IOException e)
+        {
+        }
+
+        is.reset();
+
+        return null;
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@portals.apache.org
For additional commands, e-mail: jetspeed-dev-help@portals.apache.org