You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jetspeed-dev@portals.apache.org by sh...@apache.org on 2005/06/15 23:22:20 UTC
svn commit: r190811 -
/portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java
Author: shinsuke
Date: Wed Jun 15 14:22:20 2005
New Revision: 190811
URL: http://svn.apache.org/viewcvs?rev=190811&view=rev
Log:
fixed http://issues.apache.org/jira/browse/JS2-278
Modified:
portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java
Modified: portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java
URL: http://svn.apache.org/viewcvs/portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java?rev=190811&r1=190810&r2=190811&view=diff
==============================================================================
--- portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java (original)
+++ portals/jetspeed-2/trunk/components/web-content/src/java/org/apache/jetspeed/portlet/WebContentPortlet.java Wed Jun 15 14:22:20 2005
@@ -44,6 +44,7 @@
import org.apache.jetspeed.rewriter.xml.SaxParserAdaptor;
//standard java stuff
+import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileReader;
@@ -507,14 +508,74 @@
HttpClient client = new HttpClient();
GetMethod get = new GetMethod(uri);
int status = client.executeMethod(get);
- InputStream is = get.getResponseBodyAsStream();
- // TODO need to parse HTML meta tag to get charset info
- return new InputStreamReader(is, get.getResponseCharSet());
+ BufferedInputStream bis = new BufferedInputStream(get.getResponseBodyAsStream());
+ bis.mark(BLOCK_SIZE);
+ String encoding = getContentCharSet(bis);
+ if (encoding == null)
+ {
+ encoding = get.getResponseCharSet();
+ }
+ return new InputStreamReader(bis, encoding);
}
catch (IOException e)
{
throw new PortletException(e);
}
}
-
+
+ private String getContentCharSet(InputStream is) throws IOException
+ {
+ if (!is.markSupported())
+ {
+ return null;
+ }
+
+ byte[] buf = new byte[BLOCK_SIZE];
+ try
+ {
+ is.read(buf, 0, BLOCK_SIZE);
+ String content = new String(buf, "ISO-8859-1");
+ String lowerCaseContent = content.toLowerCase();
+ int startIndex = lowerCaseContent.indexOf("<head");
+ if (startIndex == -1)
+ {
+ startIndex = 0;
+ }
+ int endIndex = lowerCaseContent.indexOf("</head");
+ if (endIndex == -1)
+ {
+ endIndex = content.length();
+ }
+ content = content.substring(startIndex, endIndex);
+
+ StringTokenizer st = new StringTokenizer(content, "<>");
+ while (st.hasMoreTokens())
+ {
+ String element = st.nextToken();
+ String lowerCaseElement = element.toLowerCase();
+ if (lowerCaseElement.startsWith("meta") && lowerCaseElement.indexOf("content-type") > 0)
+ {
+ StringTokenizer est = new StringTokenizer(element, " =\"\';");
+ while (est.hasMoreTokens())
+ {
+ if (est.nextToken().equalsIgnoreCase("charset"))
+ {
+ if (est.hasMoreTokens())
+ {
+ is.reset();
+ return est.nextToken();
+ }
+ }
+ }
+ }
+ }
+ }
+ catch (IOException e)
+ {
+ }
+
+ is.reset();
+
+ return null;
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@portals.apache.org
For additional commands, e-mail: jetspeed-dev-help@portals.apache.org