You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jetspeed-dev@portals.apache.org by ta...@apache.org on 2004/06/02 17:18:18 UTC

cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets WebClippingPortlet.java

taylor      2004/06/02 08:18:18

  Modified:    src/java/org/apache/jetspeed/portal/portlets
                        WebClippingPortlet.java
  Log:
  WebPageClipping portlet cannot handle encoding correctly - patch applied
  
  http://nagoya.apache.org/jira/browse/JS1-481
  
  patch from Shinsuke Sugaya
  
  CVS: ----------------------------------------------------------------------
  CVS: PR:
  CVS:   If this change addresses a PR in the problem report tracking
  CVS:   database, then enter the PR number(s) here.
  CVS: Obtained from:
  CVS:   If this change has been taken from another system, such as NCSA,
  CVS:   then name the system in this line, otherwise delete it.
  CVS: Submitted by:
  CVS:   If this code has been contributed to Apache by someone else; i.e.,
  CVS:   they sent us a patch or a new module, then include their name/email
  CVS:   address here. If this is your work then delete this line.
  CVS: Reviewed by:
  CVS:   If we are doing pre-commit code reviews and someone else has
  CVS:   reviewed your changes, include their name(s) here.
  CVS:   If you have not had it reviewed then delete this line.
  
  Revision  Changes    Path
  1.3       +391 -375  jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets/WebClippingPortlet.java
  
  Index: WebClippingPortlet.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets/WebClippingPortlet.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- WebClippingPortlet.java	23 Feb 2004 04:03:34 -0000	1.2
  +++ WebClippingPortlet.java	2 Jun 2004 15:18:18 -0000	1.3
  @@ -25,6 +25,7 @@
   import java.util.Enumeration;
   import java.util.Hashtable;
   import java.util.Iterator;
  +import java.util.StringTokenizer;
   
   import org.apache.ecs.ConcreteElement;
   import org.apache.jetspeed.portal.PortletConfig;
  @@ -48,379 +49,394 @@
   public class WebClippingPortlet extends AbstractInstancePortlet
   {
   
  -	/**
  -	 * Static initialization of the logger for this class
  -	 */
  -	private static final JetspeedLogger logger =
  -		JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
  -
  -	// Define parameter name for the first tag to clip
  -	public static final String START = "startTag";
  -	// Define parameter name for the last tag to clip
  -	public static final String STOP = "stopTag";
  -	// Define parameter name for a single tag to clip
  -	public static final String TAG = "Tag";
  -	// Define parameter name for the number of the tag to clip
  -	public static final String TAGNUM = "startTagNumber";
  -	// Define parameter name for the URL of the page
  -	public static final String URL = "url";
  -	// Error message for startTag without stopTag
  -	private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
  -	// Error message for wrong startTagNumber parameter
  -	private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
  -
  -	protected boolean initDone = false;
  -	protected boolean contentStale = true;
  -	protected boolean cacheContent = false;
  -	protected String username = null;
  -	protected String password = null;
  -
  -	private Hashtable patterns = null;
  -
  -	/**
  -	 * Initialize this portlet
  -	 * @throws PortletException Initialization failed
  -	 */
  -	public void init()
  -	{
  -		if (initDone)
  -			return;
  -
  -		patterns = new Hashtable();
  -
  -		try
  -		{
  -			loadParams();
  -		}
  -		catch (Exception e)
  -		{
  -			logger.info("Exception occurred:" + e.toString());
  -			e.printStackTrace();
  -		}
  -
  -		contentStale = true;
  -		initDone = true;
  -	}
  -
  -	/**
  -	 * took this from FileServerPortlet as it was private 
  -	 *
  -	*/
  -
  -	// FIXME: Currently only the expiration the HTTP Response header is honored. 
  -	//        Expiration information in <meta> tags are not honored 
  -
  -	protected Reader getReader(String url) throws IOException
  -	{
  -		URL pageUrl = new URL(url);
  -
  -		URLConnection pageConn = pageUrl.openConnection();
  -		try
  -		{
  -			// set HTTP Basic Authetication header if username and password are set
  -			if (username != null && password != null)
  -			{
  -				pageConn.setRequestProperty(
  -					"Authorization",
  -					"Basic "
  -						+ Base64.encodeAsString(username + ":" + password));
  -			}
  -
  -		}
  -		catch (Exception e)
  -		{
  -			logger.info("Exception occurred:" + e.toString());
  -			e.printStackTrace();
  -		}
  -
  -		long pageExpiration = pageConn.getExpiration();
  -		String encoding = pageConn.getContentEncoding();
  -		String tempString = null;
  -		String noCache = "no-cache";
  -
  -		if (encoding == null)
  -		{
  -			// Standard HTTP encoding
  -			encoding = "iso-8859-1";
  -		}
  -
  -		/*
  -		 * Determing if content should be cached.
  -		 */
  -		cacheContent = true; // Assume content is cached
  -		if (pageExpiration == 0)
  -		{
  -			cacheContent = false;
  -		}
  -		// Check header field CacheControl
  -		tempString = pageConn.getHeaderField("Cache-Control");
  -		if (tempString != null)
  -		{
  -			if (tempString.toLowerCase().indexOf(noCache) >= 0)
  -			{
  -				cacheContent = false;
  -			}
  -		}
  -		// Check header field Pragma
  -		tempString = pageConn.getHeaderField("Pragma");
  -		if (tempString != null)
  -		{
  -			if (tempString.toLowerCase().indexOf(noCache) >= 0)
  -			{
  -				cacheContent = false;
  -			}
  -		}
  -
  -		// Assign a reader
  -		Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
  -
  -		// Only set the page expiration it the page has not expired
  -		if (pageExpiration > System.currentTimeMillis()
  -			&& (cacheContent == true))
  -		{
  -			contentStale = false;
  -			logger.debug(
  -				"WebPagePortlet caching URL: "
  -					+ url
  -					+ " Expiration: "
  -					+ pageExpiration
  -					+ ", "
  -					+ (pageExpiration - System.currentTimeMillis())
  -					+ " milliseconds into the future");
  -			setExpirationMillis(pageExpiration);
  -		}
  -		else
  -		{
  -			contentStale = true;
  -		}
  -
  -		return rdr;
  -	}
  -
  -	/**
  -	This methods outputs the content of the portlet for a given 
  -	request.
  -	
  -	@param data the RunData object for the request
  -	@return the content to be displayed to the user-agent
  -	*/
  -	public ConcreteElement getContent(RunData data)
  -	{
  -		PortletConfig config = this.getPortletConfig();
  -
  -		if (contentStale == true)
  -			return getWebClippedContent(data, config);
  -
  -		if (null == getExpirationMillis())
  -			return getContent(data, null, true);
  -
  -		if (getExpirationMillis().longValue() <= System.currentTimeMillis())
  -			return getWebClippedContent(data, config);
  -
  -		return getContent(data, null, true);
  -	}
  -
  -	/*
  -	 * This method returns the clipped part of the Web page
  -	 */
  -	private ConcreteElement getWebClippedContent(
  -		RunData data,
  -		PortletConfig config)
  -	{
  -		String clippedString = ""; // HTML to visualize
  -		JetspeedClearElement element = null;
  -		int patternNumber = 1;
  -		int tagNumber = 0;
  -		Reader htmlReader;
  -		String defaultUrl = selectUrl(data, config);
  -
  -		try
  -		{
  -			// Re-load parameters to see immediately the effect of changes
  -			loadParams();
  -			Enumeration en = patterns.keys();
  -
  -			while (en.hasMoreElements())
  -			{
  -				String name = (String) en.nextElement();
  -
  -				// Search for parameters in the right order
  -				if (name.equals(START + String.valueOf(patternNumber))
  -					|| name.equals(TAG + String.valueOf(patternNumber)))
  -				{
  -					String start =
  -						(String) patterns.get(
  -							START + String.valueOf(patternNumber));
  -					String simpleTag =
  -						(String) patterns.get(
  -							TAG + String.valueOf(patternNumber));
  -					String stop =
  -						(String) patterns.get(
  -							STOP + String.valueOf(patternNumber));
  -					String tagNum =
  -						(String) patterns.get(
  -							TAGNUM + String.valueOf(patternNumber));
  -					// A group of params can have a specific url
  -					String url =
  -						(String) patterns.get(
  -							URL + String.valueOf(patternNumber));
  -					url = controlUrl(url, defaultUrl);
  -					htmlReader = getReader(url);
  -
  -					if ((start != null) && (stop == null))
  -					{
  -						element = new JetspeedClearElement(BAD_PARAM);
  -						return element;
  -					}
  -
  -					if (tagNum != null)
  -					{
  -						try
  -						{
  -							tagNumber = Integer.parseInt(tagNum);
  -						}
  -						catch (NumberFormatException e)
  -						{
  -							logger.info("Exception occurred:" + e.toString());
  -							e.printStackTrace();
  -							element = new JetspeedClearElement(BAD_NUMBER);
  -							return element;
  -						}
  -					}
  -
  -					if ((simpleTag != null) && (tagNum == null))
  -						clippedString =
  -							clippedString
  -								+ Transformer.findElement(
  -									htmlReader,
  -									url,
  -									simpleTag);
  -					else if ((simpleTag != null) && (tagNum != null))
  -						clippedString =
  -							clippedString
  -								+ Transformer.findElementNumber(
  -									htmlReader,
  -									url,
  -									simpleTag,
  -									tagNumber);
  -					else if (tagNum == null)
  -						clippedString =
  -							clippedString
  -								+ Transformer.clipElements(
  -									htmlReader,
  -									url,
  -									start,
  -									stop);
  -					else if (tagNum != null)
  -						clippedString =
  -							clippedString
  -								+ Transformer.clipElementsNumber(
  -									htmlReader,
  -									url,
  -									start,
  -									stop,
  -									tagNumber);
  -
  -					patternNumber = patternNumber + 1;
  -					//Restart Enumeration, because params could not be in the right order
  -					en = patterns.keys();
  -					htmlReader.close();
  -				}
  -			}
  -
  -			element = new JetspeedClearElement(clippedString);
  -
  -			//FIXME: We should do a clearContent() for the media type, not ALL media types
  -			this.clearContent();
  -			// doing this because setContent() is not overwriting current content.
  -			this.setContent(element);
  -
  -		}
  -		catch (Exception e)
  -		{
  -			logger.info("Exception occurred:" + e.toString());
  -			e.printStackTrace();
  -		}
  -
  -		return element;
  -	}
  -
  -	/**
  -	 * Usually called by caching system when portlet is marked as expired, but
  -	 * has not be idle longer then TimeToLive.
  -	 *
  -	 * Any cached content that is expired need to be refreshed.
  -	 */
  -	public void refresh()
  -	{
  -		if (cacheContent == true)
  -		{
  -			getWebClippedContent(null, this.getPortletConfig());
  -		}
  -	}
  -
  -	/**
  -	 * Select the URL to use for this portlet.
  -	 * @return The URL to use for this portlet
  -	 */
  -	protected String selectUrl(RunData data, PortletConfig config)
  -	{
  -		String url = config.getURL();
  -		return url;
  -	}
  -
  -	/*
  -	 * Choose between a specific url and the default url
  -	 */
  -	private String controlUrl(String url, String defaultUrl)
  -	{
  -		if (url == null)
  -		{
  -			return defaultUrl;
  -		}
  -
  -		//if the given URL doesn not include a protocol... ie http:// or ftp://
  -		//then resolve it relative to the current URL context
  -		if (url.indexOf("://") < 0)
  -		{
  -			url = TurbineServlet.getResource(url).toString();
  -		}
  -
  -		return url;
  -	}
  -
  -	/*
  -	 * Load portlet parameters
  -	 */
  -	private void loadParams() throws PortletException
  -	{
  -		Iterator en = this.getPortletConfig().getInitParameterNames();
  -
  -		try
  -		{
  -			while (en.hasNext())
  -			{
  -				String name = (String) en.next();
  -
  -				if (name.equals("username"))
  -					username =
  -						this.getPortletConfig().getInitParameter("username");
  -				else if (name.equals("password"))
  -					password =
  -						this.getPortletConfig().getInitParameter("password");
  -				else
  -					patterns.put(
  -						name,
  -						this.getPortletConfig().getInitParameter(name));
  -
  -			}
  -		}
  -		catch (Exception e)
  -		{
  -			logger.info("Exception occurred:" + e.toString());
  -			e.printStackTrace();
  -			throw new PortletException(e.toString());
  -		}
  -	}
  +    /**
  +     * Static initialization of the logger for this class
  +     */
  +    private static final JetspeedLogger logger =
  +        JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
  +
  +    // Define parameter name for the first tag to clip
  +    public static final String START = "startTag";
  +    // Define parameter name for the last tag to clip
  +    public static final String STOP = "stopTag";
  +    // Define parameter name for a single tag to clip
  +    public static final String TAG = "Tag";
  +    // Define parameter name for the number of the tag to clip
  +    public static final String TAGNUM = "startTagNumber";
  +    // Define parameter name for the URL of the page
  +    public static final String URL = "url";
  +    // Error message for startTag without stopTag
  +    private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
  +    // Error message for wrong startTagNumber parameter
  +    private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
  +
  +    protected boolean initDone = false;
  +    protected boolean contentStale = true;
  +    protected boolean cacheContent = false;
  +    protected String username = null;
  +    protected String password = null;
  +
  +    private Hashtable patterns = null;
  +
  +    /**
  +     * Initialize this portlet
  +     * @throws PortletException Initialization failed
  +     */
  +    public void init()
  +    {
  +        if (initDone)
  +            return;
  +
  +        patterns = new Hashtable();
  +
  +        try
  +        {
  +            loadParams();
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +        }
  +
  +        contentStale = true;
  +        initDone = true;
  +    }
  +
  +    /**
  +     * took this from FileServerPortlet as it was private 
  +     *
  +    */
  +
  +    // FIXME: Currently only the expiration the HTTP Response header is honored. 
  +    //        Expiration information in <meta> tags are not honored 
  +
  +    protected Reader getReader(String url) throws IOException
  +    {
  +        URL pageUrl = new URL(url);
  +
  +        URLConnection pageConn = pageUrl.openConnection();
  +        try
  +        {
  +            // set HTTP Basic Authetication header if username and password are set
  +            if (username != null && password != null)
  +            {
  +                pageConn.setRequestProperty(
  +                    "Authorization",
  +                    "Basic "
  +                        + Base64.encodeAsString(username + ":" + password));
  +            }
  +
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +        }
  +
  +        long pageExpiration = pageConn.getExpiration();
  +        String encoding = "iso-8859-1";
  +        String contentType = pageConn.getContentType();
  +        String tempString = null;
  +        String noCache = "no-cache";
  +
  +        if (contentType != null)
  +        {
  +            StringTokenizer st = new StringTokenizer(contentType, "; =");
  +            while (st.hasMoreTokens())
  +            {
  +                if (st.nextToken().equalsIgnoreCase("charset"))
  +                {
  +                    try
  +                    {
  +                        encoding = st.nextToken();
  +                        break;
  +                    }
  +                    catch (Exception e)
  +                    {
  +                        break;
  +                    }
  +                }
  +            }
  +        }
  +
  +        /*
  +         * Determing if content should be cached.
  +         */
  +        cacheContent = true; // Assume content is cached
  +        if (pageExpiration == 0)
  +        {
  +            cacheContent = false;
  +        }
  +        // Check header field CacheControl
  +        tempString = pageConn.getHeaderField("Cache-Control");
  +        if (tempString != null)
  +        {
  +            if (tempString.toLowerCase().indexOf(noCache) >= 0)
  +            {
  +                cacheContent = false;
  +            }
  +        }
  +        // Check header field Pragma
  +        tempString = pageConn.getHeaderField("Pragma");
  +        if (tempString != null)
  +        {
  +            if (tempString.toLowerCase().indexOf(noCache) >= 0)
  +            {
  +                cacheContent = false;
  +            }
  +        }
  +
  +        // Assign a reader
  +        Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
  +
  +        // Only set the page expiration it the page has not expired
  +        if (pageExpiration > System.currentTimeMillis()
  +            && (cacheContent == true))
  +        {
  +            contentStale = false;
  +            logger.debug(
  +                "WebPagePortlet caching URL: "
  +                    + url
  +                    + " Expiration: "
  +                    + pageExpiration
  +                    + ", "
  +                    + (pageExpiration - System.currentTimeMillis())
  +                    + " milliseconds into the future");
  +            setExpirationMillis(pageExpiration);
  +        }
  +        else
  +        {
  +            contentStale = true;
  +        }
  +
  +        return rdr;
  +    }
  +
  +    /**
  +    This methods outputs the content of the portlet for a given 
  +    request.
  +    
  +    @param data the RunData object for the request
  +    @return the content to be displayed to the user-agent
  +    */
  +    public ConcreteElement getContent(RunData data)
  +    {
  +        PortletConfig config = this.getPortletConfig();
  +
  +        if (contentStale == true)
  +            return getWebClippedContent(data, config);
  +
  +        if (null == getExpirationMillis())
  +            return getContent(data, null, true);
  +
  +        if (getExpirationMillis().longValue() <= System.currentTimeMillis())
  +            return getWebClippedContent(data, config);
  +
  +        return getContent(data, null, true);
  +    }
  +
  +    /*
  +     * This method returns the clipped part of the Web page
  +     */
  +    private ConcreteElement getWebClippedContent(
  +        RunData data,
  +        PortletConfig config)
  +    {
  +        String clippedString = ""; // HTML to visualize
  +        JetspeedClearElement element = null;
  +        int patternNumber = 1;
  +        int tagNumber = 0;
  +        Reader htmlReader;
  +        String defaultUrl = selectUrl(data, config);
  +
  +        try
  +        {
  +            // Re-load parameters to see immediately the effect of changes
  +            loadParams();
  +            Enumeration en = patterns.keys();
  +
  +            while (en.hasMoreElements())
  +            {
  +                String name = (String) en.nextElement();
  +
  +                // Search for parameters in the right order
  +                if (name.equals(START + String.valueOf(patternNumber))
  +                    || name.equals(TAG + String.valueOf(patternNumber)))
  +                {
  +                    String start =
  +                        (String) patterns.get(
  +                            START + String.valueOf(patternNumber));
  +                    String simpleTag =
  +                        (String) patterns.get(
  +                            TAG + String.valueOf(patternNumber));
  +                    String stop =
  +                        (String) patterns.get(
  +                            STOP + String.valueOf(patternNumber));
  +                    String tagNum =
  +                        (String) patterns.get(
  +                            TAGNUM + String.valueOf(patternNumber));
  +                    // A group of params can have a specific url
  +                    String url =
  +                        (String) patterns.get(
  +                            URL + String.valueOf(patternNumber));
  +                    url = controlUrl(url, defaultUrl);
  +                    htmlReader = getReader(url);
  +
  +                    if ((start != null) && (stop == null))
  +                    {
  +                        element = new JetspeedClearElement(BAD_PARAM);
  +                        return element;
  +                    }
  +
  +                    if (tagNum != null)
  +                    {
  +                        try
  +                        {
  +                            tagNumber = Integer.parseInt(tagNum);
  +                        }
  +                        catch (NumberFormatException e)
  +                        {
  +                            logger.info("Exception occurred:" + e.toString());
  +                            e.printStackTrace();
  +                            element = new JetspeedClearElement(BAD_NUMBER);
  +                            return element;
  +                        }
  +                    }
  +
  +                    if ((simpleTag != null) && (tagNum == null))
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.findElement(
  +                                    htmlReader,
  +                                    url,
  +                                    simpleTag);
  +                    else if ((simpleTag != null) && (tagNum != null))
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.findElementNumber(
  +                                    htmlReader,
  +                                    url,
  +                                    simpleTag,
  +                                    tagNumber);
  +                    else if (tagNum == null)
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.clipElements(
  +                                    htmlReader,
  +                                    url,
  +                                    start,
  +                                    stop);
  +                    else if (tagNum != null)
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.clipElementsNumber(
  +                                    htmlReader,
  +                                    url,
  +                                    start,
  +                                    stop,
  +                                    tagNumber);
  +
  +                    patternNumber = patternNumber + 1;
  +                    //Restart Enumeration, because params could not be in the right order
  +                    en = patterns.keys();
  +                    htmlReader.close();
  +                }
  +            }
  +
  +            element = new JetspeedClearElement(clippedString);
  +
  +            //FIXME: We should do a clearContent() for the media type, not ALL media types
  +            this.clearContent();
  +            // doing this because setContent() is not overwriting current content.
  +            this.setContent(element);
  +
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +        }
  +
  +        return element;
  +    }
  +
  +    /**
  +     * Usually called by caching system when portlet is marked as expired, but
  +     * has not be idle longer then TimeToLive.
  +     *
  +     * Any cached content that is expired need to be refreshed.
  +     */
  +    public void refresh()
  +    {
  +        if (cacheContent == true)
  +        {
  +            getWebClippedContent(null, this.getPortletConfig());
  +        }
  +    }
  +
  +    /**
  +     * Select the URL to use for this portlet.
  +     * @return The URL to use for this portlet
  +     */
  +    protected String selectUrl(RunData data, PortletConfig config)
  +    {
  +        String url = config.getURL();
  +        return url;
  +    }
  +
  +    /*
  +     * Choose between a specific url and the default url
  +     */
  +    private String controlUrl(String url, String defaultUrl)
  +    {
  +        if (url == null)
  +        {
  +            return defaultUrl;
  +        }
  +
  +        //if the given URL doesn not include a protocol... ie http:// or ftp://
  +        //then resolve it relative to the current URL context
  +        if (url.indexOf("://") < 0)
  +        {
  +            url = TurbineServlet.getResource(url).toString();
  +        }
  +
  +        return url;
  +    }
  +
  +    /*
  +     * Load portlet parameters
  +     */
  +    private void loadParams() throws PortletException
  +    {
  +        Iterator en = this.getPortletConfig().getInitParameterNames();
  +
  +        try
  +        {
  +            while (en.hasNext())
  +            {
  +                String name = (String) en.next();
  +
  +                if (name.equals("username"))
  +                    username =
  +                        this.getPortletConfig().getInitParameter("username");
  +                else if (name.equals("password"))
  +                    password =
  +                        this.getPortletConfig().getInitParameter("password");
  +                else
  +                    patterns.put(
  +                        name,
  +                        this.getPortletConfig().getInitParameter(name));
  +
  +            }
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +            throw new PortletException(e.toString());
  +        }
  +    }
   
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org