You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jetspeed-dev@portals.apache.org by ta...@apache.org on 2004/06/02 17:18:18 UTC
cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets WebClippingPortlet.java
taylor 2004/06/02 08:18:18
Modified: src/java/org/apache/jetspeed/portal/portlets
WebClippingPortlet.java
Log:
WebPageClipping portlet cannot handle encoding correctly - patch applied
http://nagoya.apache.org/jira/browse/JS1-481
patch from Shinsuke Sugaya
CVS: ----------------------------------------------------------------------
CVS: PR:
CVS: If this change addresses a PR in the problem report tracking
CVS: database, then enter the PR number(s) here.
CVS: Obtained from:
CVS: If this change has been taken from another system, such as NCSA,
CVS: then name the system in this line, otherwise delete it.
CVS: Submitted by:
CVS: If this code has been contributed to Apache by someone else; i.e.,
CVS: they sent us a patch or a new module, then include their name/email
CVS: address here. If this is your work then delete this line.
CVS: Reviewed by:
CVS: If we are doing pre-commit code reviews and someone else has
CVS: reviewed your changes, include their name(s) here.
CVS: If you have not had it reviewed then delete this line.
Revision Changes Path
1.3 +391 -375 jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets/WebClippingPortlet.java
Index: WebClippingPortlet.java
===================================================================
RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets/WebClippingPortlet.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- WebClippingPortlet.java 23 Feb 2004 04:03:34 -0000 1.2
+++ WebClippingPortlet.java 2 Jun 2004 15:18:18 -0000 1.3
@@ -25,6 +25,7 @@
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
+import java.util.StringTokenizer;
import org.apache.ecs.ConcreteElement;
import org.apache.jetspeed.portal.PortletConfig;
@@ -48,379 +49,394 @@
public class WebClippingPortlet extends AbstractInstancePortlet
{
- /**
- * Static initialization of the logger for this class
- */
- private static final JetspeedLogger logger =
- JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
-
- // Define parameter name for the first tag to clip
- public static final String START = "startTag";
- // Define parameter name for the last tag to clip
- public static final String STOP = "stopTag";
- // Define parameter name for a single tag to clip
- public static final String TAG = "Tag";
- // Define parameter name for the number of the tag to clip
- public static final String TAGNUM = "startTagNumber";
- // Define parameter name for the URL of the page
- public static final String URL = "url";
- // Error message for startTag without stopTag
- private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
- // Error message for wrong startTagNumber parameter
- private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
-
- protected boolean initDone = false;
- protected boolean contentStale = true;
- protected boolean cacheContent = false;
- protected String username = null;
- protected String password = null;
-
- private Hashtable patterns = null;
-
- /**
- * Initialize this portlet
- * @throws PortletException Initialization failed
- */
- public void init()
- {
- if (initDone)
- return;
-
- patterns = new Hashtable();
-
- try
- {
- loadParams();
- }
- catch (Exception e)
- {
- logger.info("Exception occurred:" + e.toString());
- e.printStackTrace();
- }
-
- contentStale = true;
- initDone = true;
- }
-
- /**
- * took this from FileServerPortlet as it was private
- *
- */
-
- // FIXME: Currently only the expiration the HTTP Response header is honored.
- // Expiration information in <meta> tags are not honored
-
- protected Reader getReader(String url) throws IOException
- {
- URL pageUrl = new URL(url);
-
- URLConnection pageConn = pageUrl.openConnection();
- try
- {
- // set HTTP Basic Authetication header if username and password are set
- if (username != null && password != null)
- {
- pageConn.setRequestProperty(
- "Authorization",
- "Basic "
- + Base64.encodeAsString(username + ":" + password));
- }
-
- }
- catch (Exception e)
- {
- logger.info("Exception occurred:" + e.toString());
- e.printStackTrace();
- }
-
- long pageExpiration = pageConn.getExpiration();
- String encoding = pageConn.getContentEncoding();
- String tempString = null;
- String noCache = "no-cache";
-
- if (encoding == null)
- {
- // Standard HTTP encoding
- encoding = "iso-8859-1";
- }
-
- /*
- * Determing if content should be cached.
- */
- cacheContent = true; // Assume content is cached
- if (pageExpiration == 0)
- {
- cacheContent = false;
- }
- // Check header field CacheControl
- tempString = pageConn.getHeaderField("Cache-Control");
- if (tempString != null)
- {
- if (tempString.toLowerCase().indexOf(noCache) >= 0)
- {
- cacheContent = false;
- }
- }
- // Check header field Pragma
- tempString = pageConn.getHeaderField("Pragma");
- if (tempString != null)
- {
- if (tempString.toLowerCase().indexOf(noCache) >= 0)
- {
- cacheContent = false;
- }
- }
-
- // Assign a reader
- Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
-
- // Only set the page expiration it the page has not expired
- if (pageExpiration > System.currentTimeMillis()
- && (cacheContent == true))
- {
- contentStale = false;
- logger.debug(
- "WebPagePortlet caching URL: "
- + url
- + " Expiration: "
- + pageExpiration
- + ", "
- + (pageExpiration - System.currentTimeMillis())
- + " milliseconds into the future");
- setExpirationMillis(pageExpiration);
- }
- else
- {
- contentStale = true;
- }
-
- return rdr;
- }
-
- /**
- This methods outputs the content of the portlet for a given
- request.
-
- @param data the RunData object for the request
- @return the content to be displayed to the user-agent
- */
- public ConcreteElement getContent(RunData data)
- {
- PortletConfig config = this.getPortletConfig();
-
- if (contentStale == true)
- return getWebClippedContent(data, config);
-
- if (null == getExpirationMillis())
- return getContent(data, null, true);
-
- if (getExpirationMillis().longValue() <= System.currentTimeMillis())
- return getWebClippedContent(data, config);
-
- return getContent(data, null, true);
- }
-
- /*
- * This method returns the clipped part of the Web page
- */
- private ConcreteElement getWebClippedContent(
- RunData data,
- PortletConfig config)
- {
- String clippedString = ""; // HTML to visualize
- JetspeedClearElement element = null;
- int patternNumber = 1;
- int tagNumber = 0;
- Reader htmlReader;
- String defaultUrl = selectUrl(data, config);
-
- try
- {
- // Re-load parameters to see immediately the effect of changes
- loadParams();
- Enumeration en = patterns.keys();
-
- while (en.hasMoreElements())
- {
- String name = (String) en.nextElement();
-
- // Search for parameters in the right order
- if (name.equals(START + String.valueOf(patternNumber))
- || name.equals(TAG + String.valueOf(patternNumber)))
- {
- String start =
- (String) patterns.get(
- START + String.valueOf(patternNumber));
- String simpleTag =
- (String) patterns.get(
- TAG + String.valueOf(patternNumber));
- String stop =
- (String) patterns.get(
- STOP + String.valueOf(patternNumber));
- String tagNum =
- (String) patterns.get(
- TAGNUM + String.valueOf(patternNumber));
- // A group of params can have a specific url
- String url =
- (String) patterns.get(
- URL + String.valueOf(patternNumber));
- url = controlUrl(url, defaultUrl);
- htmlReader = getReader(url);
-
- if ((start != null) && (stop == null))
- {
- element = new JetspeedClearElement(BAD_PARAM);
- return element;
- }
-
- if (tagNum != null)
- {
- try
- {
- tagNumber = Integer.parseInt(tagNum);
- }
- catch (NumberFormatException e)
- {
- logger.info("Exception occurred:" + e.toString());
- e.printStackTrace();
- element = new JetspeedClearElement(BAD_NUMBER);
- return element;
- }
- }
-
- if ((simpleTag != null) && (tagNum == null))
- clippedString =
- clippedString
- + Transformer.findElement(
- htmlReader,
- url,
- simpleTag);
- else if ((simpleTag != null) && (tagNum != null))
- clippedString =
- clippedString
- + Transformer.findElementNumber(
- htmlReader,
- url,
- simpleTag,
- tagNumber);
- else if (tagNum == null)
- clippedString =
- clippedString
- + Transformer.clipElements(
- htmlReader,
- url,
- start,
- stop);
- else if (tagNum != null)
- clippedString =
- clippedString
- + Transformer.clipElementsNumber(
- htmlReader,
- url,
- start,
- stop,
- tagNumber);
-
- patternNumber = patternNumber + 1;
- //Restart Enumeration, because params could not be in the right order
- en = patterns.keys();
- htmlReader.close();
- }
- }
-
- element = new JetspeedClearElement(clippedString);
-
- //FIXME: We should do a clearContent() for the media type, not ALL media types
- this.clearContent();
- // doing this because setContent() is not overwriting current content.
- this.setContent(element);
-
- }
- catch (Exception e)
- {
- logger.info("Exception occurred:" + e.toString());
- e.printStackTrace();
- }
-
- return element;
- }
-
- /**
- * Usually called by caching system when portlet is marked as expired, but
- * has not be idle longer then TimeToLive.
- *
- * Any cached content that is expired need to be refreshed.
- */
- public void refresh()
- {
- if (cacheContent == true)
- {
- getWebClippedContent(null, this.getPortletConfig());
- }
- }
-
- /**
- * Select the URL to use for this portlet.
- * @return The URL to use for this portlet
- */
- protected String selectUrl(RunData data, PortletConfig config)
- {
- String url = config.getURL();
- return url;
- }
-
- /*
- * Choose between a specific url and the default url
- */
- private String controlUrl(String url, String defaultUrl)
- {
- if (url == null)
- {
- return defaultUrl;
- }
-
- //if the given URL doesn not include a protocol... ie http:// or ftp://
- //then resolve it relative to the current URL context
- if (url.indexOf("://") < 0)
- {
- url = TurbineServlet.getResource(url).toString();
- }
-
- return url;
- }
-
- /*
- * Load portlet parameters
- */
- private void loadParams() throws PortletException
- {
- Iterator en = this.getPortletConfig().getInitParameterNames();
-
- try
- {
- while (en.hasNext())
- {
- String name = (String) en.next();
-
- if (name.equals("username"))
- username =
- this.getPortletConfig().getInitParameter("username");
- else if (name.equals("password"))
- password =
- this.getPortletConfig().getInitParameter("password");
- else
- patterns.put(
- name,
- this.getPortletConfig().getInitParameter(name));
-
- }
- }
- catch (Exception e)
- {
- logger.info("Exception occurred:" + e.toString());
- e.printStackTrace();
- throw new PortletException(e.toString());
- }
- }
+ /**
+ * Static initialization of the logger for this class
+ */
+ private static final JetspeedLogger logger =
+ JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
+
+ // Define parameter name for the first tag to clip
+ public static final String START = "startTag";
+ // Define parameter name for the last tag to clip
+ public static final String STOP = "stopTag";
+ // Define parameter name for a single tag to clip
+ public static final String TAG = "Tag";
+ // Define parameter name for the number of the tag to clip
+ public static final String TAGNUM = "startTagNumber";
+ // Define parameter name for the URL of the page
+ public static final String URL = "url";
+ // Error message for startTag without stopTag
+ private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
+ // Error message for wrong startTagNumber parameter
+ private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
+
+ protected boolean initDone = false;
+ protected boolean contentStale = true;
+ protected boolean cacheContent = false;
+ protected String username = null;
+ protected String password = null;
+
+ private Hashtable patterns = null;
+
+ /**
+ * Initialize this portlet
+ * @throws PortletException Initialization failed
+ */
+ public void init()
+ {
+ if (initDone)
+ return;
+
+ patterns = new Hashtable();
+
+ try
+ {
+ loadParams();
+ }
+ catch (Exception e)
+ {
+ logger.info("Exception occurred:" + e.toString());
+ e.printStackTrace();
+ }
+
+ contentStale = true;
+ initDone = true;
+ }
+
+ /**
+ * took this from FileServerPortlet as it was private
+ *
+ */
+
+ // FIXME: Currently only the expiration the HTTP Response header is honored.
+ // Expiration information in <meta> tags are not honored
+
+ protected Reader getReader(String url) throws IOException
+ {
+ URL pageUrl = new URL(url);
+
+ URLConnection pageConn = pageUrl.openConnection();
+ try
+ {
+ // set HTTP Basic Authetication header if username and password are set
+ if (username != null && password != null)
+ {
+ pageConn.setRequestProperty(
+ "Authorization",
+ "Basic "
+ + Base64.encodeAsString(username + ":" + password));
+ }
+
+ }
+ catch (Exception e)
+ {
+ logger.info("Exception occurred:" + e.toString());
+ e.printStackTrace();
+ }
+
+ long pageExpiration = pageConn.getExpiration();
+ String encoding = "iso-8859-1";
+ String contentType = pageConn.getContentType();
+ String tempString = null;
+ String noCache = "no-cache";
+
+ if (contentType != null)
+ {
+ StringTokenizer st = new StringTokenizer(contentType, "; =");
+ while (st.hasMoreTokens())
+ {
+ if (st.nextToken().equalsIgnoreCase("charset"))
+ {
+ try
+ {
+ encoding = st.nextToken();
+ break;
+ }
+ catch (Exception e)
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * Determing if content should be cached.
+ */
+ cacheContent = true; // Assume content is cached
+ if (pageExpiration == 0)
+ {
+ cacheContent = false;
+ }
+ // Check header field CacheControl
+ tempString = pageConn.getHeaderField("Cache-Control");
+ if (tempString != null)
+ {
+ if (tempString.toLowerCase().indexOf(noCache) >= 0)
+ {
+ cacheContent = false;
+ }
+ }
+ // Check header field Pragma
+ tempString = pageConn.getHeaderField("Pragma");
+ if (tempString != null)
+ {
+ if (tempString.toLowerCase().indexOf(noCache) >= 0)
+ {
+ cacheContent = false;
+ }
+ }
+
+ // Assign a reader
+ Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
+
+ // Only set the page expiration it the page has not expired
+ if (pageExpiration > System.currentTimeMillis()
+ && (cacheContent == true))
+ {
+ contentStale = false;
+ logger.debug(
+ "WebPagePortlet caching URL: "
+ + url
+ + " Expiration: "
+ + pageExpiration
+ + ", "
+ + (pageExpiration - System.currentTimeMillis())
+ + " milliseconds into the future");
+ setExpirationMillis(pageExpiration);
+ }
+ else
+ {
+ contentStale = true;
+ }
+
+ return rdr;
+ }
+
+ /**
+ This methods outputs the content of the portlet for a given
+ request.
+
+ @param data the RunData object for the request
+ @return the content to be displayed to the user-agent
+ */
+ public ConcreteElement getContent(RunData data)
+ {
+ PortletConfig config = this.getPortletConfig();
+
+ if (contentStale == true)
+ return getWebClippedContent(data, config);
+
+ if (null == getExpirationMillis())
+ return getContent(data, null, true);
+
+ if (getExpirationMillis().longValue() <= System.currentTimeMillis())
+ return getWebClippedContent(data, config);
+
+ return getContent(data, null, true);
+ }
+
+ /*
+ * This method returns the clipped part of the Web page
+ */
+ private ConcreteElement getWebClippedContent(
+ RunData data,
+ PortletConfig config)
+ {
+ String clippedString = ""; // HTML to visualize
+ JetspeedClearElement element = null;
+ int patternNumber = 1;
+ int tagNumber = 0;
+ Reader htmlReader;
+ String defaultUrl = selectUrl(data, config);
+
+ try
+ {
+ // Re-load parameters to see immediately the effect of changes
+ loadParams();
+ Enumeration en = patterns.keys();
+
+ while (en.hasMoreElements())
+ {
+ String name = (String) en.nextElement();
+
+ // Search for parameters in the right order
+ if (name.equals(START + String.valueOf(patternNumber))
+ || name.equals(TAG + String.valueOf(patternNumber)))
+ {
+ String start =
+ (String) patterns.get(
+ START + String.valueOf(patternNumber));
+ String simpleTag =
+ (String) patterns.get(
+ TAG + String.valueOf(patternNumber));
+ String stop =
+ (String) patterns.get(
+ STOP + String.valueOf(patternNumber));
+ String tagNum =
+ (String) patterns.get(
+ TAGNUM + String.valueOf(patternNumber));
+ // A group of params can have a specific url
+ String url =
+ (String) patterns.get(
+ URL + String.valueOf(patternNumber));
+ url = controlUrl(url, defaultUrl);
+ htmlReader = getReader(url);
+
+ if ((start != null) && (stop == null))
+ {
+ element = new JetspeedClearElement(BAD_PARAM);
+ return element;
+ }
+
+ if (tagNum != null)
+ {
+ try
+ {
+ tagNumber = Integer.parseInt(tagNum);
+ }
+ catch (NumberFormatException e)
+ {
+ logger.info("Exception occurred:" + e.toString());
+ e.printStackTrace();
+ element = new JetspeedClearElement(BAD_NUMBER);
+ return element;
+ }
+ }
+
+ if ((simpleTag != null) && (tagNum == null))
+ clippedString =
+ clippedString
+ + Transformer.findElement(
+ htmlReader,
+ url,
+ simpleTag);
+ else if ((simpleTag != null) && (tagNum != null))
+ clippedString =
+ clippedString
+ + Transformer.findElementNumber(
+ htmlReader,
+ url,
+ simpleTag,
+ tagNumber);
+ else if (tagNum == null)
+ clippedString =
+ clippedString
+ + Transformer.clipElements(
+ htmlReader,
+ url,
+ start,
+ stop);
+ else if (tagNum != null)
+ clippedString =
+ clippedString
+ + Transformer.clipElementsNumber(
+ htmlReader,
+ url,
+ start,
+ stop,
+ tagNumber);
+
+ patternNumber = patternNumber + 1;
+ //Restart Enumeration, because params could not be in the right order
+ en = patterns.keys();
+ htmlReader.close();
+ }
+ }
+
+ element = new JetspeedClearElement(clippedString);
+
+ //FIXME: We should do a clearContent() for the media type, not ALL media types
+ this.clearContent();
+ // doing this because setContent() is not overwriting current content.
+ this.setContent(element);
+
+ }
+ catch (Exception e)
+ {
+ logger.info("Exception occurred:" + e.toString());
+ e.printStackTrace();
+ }
+
+ return element;
+ }
+
+ /**
+ * Usually called by caching system when portlet is marked as expired, but
+ * has not be idle longer then TimeToLive.
+ *
+ * Any cached content that is expired need to be refreshed.
+ */
+ public void refresh()
+ {
+ if (cacheContent == true)
+ {
+ getWebClippedContent(null, this.getPortletConfig());
+ }
+ }
+
+ /**
+ * Select the URL to use for this portlet.
+ * @return The URL to use for this portlet
+ */
+ protected String selectUrl(RunData data, PortletConfig config)
+ {
+ String url = config.getURL();
+ return url;
+ }
+
+ /*
+ * Choose between a specific url and the default url
+ */
+ private String controlUrl(String url, String defaultUrl)
+ {
+ if (url == null)
+ {
+ return defaultUrl;
+ }
+
+ //if the given URL doesn not include a protocol... ie http:// or ftp://
+ //then resolve it relative to the current URL context
+ if (url.indexOf("://") < 0)
+ {
+ url = TurbineServlet.getResource(url).toString();
+ }
+
+ return url;
+ }
+
+ /*
+ * Load portlet parameters
+ */
+ private void loadParams() throws PortletException
+ {
+ Iterator en = this.getPortletConfig().getInitParameterNames();
+
+ try
+ {
+ while (en.hasNext())
+ {
+ String name = (String) en.next();
+
+ if (name.equals("username"))
+ username =
+ this.getPortletConfig().getInitParameter("username");
+ else if (name.equals("password"))
+ password =
+ this.getPortletConfig().getInitParameter("password");
+ else
+ patterns.put(
+ name,
+ this.getPortletConfig().getInitParameter(name));
+
+ }
+ }
+ catch (Exception e)
+ {
+ logger.info("Exception occurred:" + e.toString());
+ e.printStackTrace();
+ throw new PortletException(e.toString());
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org