You are viewing a plain text version of this content. The canonical link for it is here.
Posted to taglibs-dev@jakarta.apache.org by ca...@apache.org on 2002/04/30 23:54:01 UTC

cvs commit: jakarta-taglibs/scrape/xml scrape.xml

catlett     02/04/30 14:54:01

  Modified:    scrape/xml scrape.xml
  Log:
  updated documentation to address use when scraping through a proxy
  
  Revision  Changes    Path
  1.3       +113 -6    jakarta-taglibs/scrape/xml/scrape.xml
  
  Index: scrape.xml
  ===================================================================
  RCS file: /home/cvs/jakarta-taglibs/scrape/xml/scrape.xml,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- scrape.xml	26 Nov 2001 17:50:51 -0000	1.2
  +++ scrape.xml	30 Apr 2002 21:54:01 -0000	1.3
  @@ -128,14 +128,69 @@
           <name>time</name>
           <required>no</required>
           <rtexprvalue>no</rtexprvalue>
  -        <description>The length of time the JSP waits before attempting
  +        <description>
  +          The length of time the JSP waits before attempting
             to rescrape the document. The value of time is specified in minutes. 
             The minimum value is 10 minutes. Note that the minimum value is used 
   	  if a time attribute is not specified.
   	</description>
           <availability>1.0</availability>
         </attribute>
  +
  +      <attribute>
  +        <name>useProxy</name>
  +        <required>no</required>
  +        <rtexprvalue>no</rtexprvalue>
  +        <description>
  +	  Tells the taglib to use a proxy for the connection.  The name and port of
  +          the proxy server will be retreived from the system properties 
  +          http.proxyHost and http.proxyPort.  This attribute is not necessary if 
  +          setting the name amd port with the proxyServer and proxyPort attributes. 
  +	</description>
  +        <availability>1.0</availability>
  +      </attribute>
  +
  +      <attribute>
  +        <name>proxyServer</name>
  +        <required>no</required>
  +        <rtexprvalue>no</rtexprvalue>
  +        <description>
  +	  The name of the proxy server to use. 
  +	</description>
  +        <availability>1.0</availability>
  +      </attribute>
           
  +      <attribute>
  +        <name>proxyPort</name>
  +        <required>no</required>
  +        <rtexprvalue>no</rtexprvalue>
  +        <description>
  +	  The number of the port to use to connect to the proxy server. 
  +          Defaults to 3128.
  +	</description>
  +        <availability>1.0</availability>
  +      </attribute>
  +
  +      <attribute>
  +        <name>proxyName</name>
  +        <required>no</required>
  +        <rtexprvalue>no</rtexprvalue>
  +        <description>
  +	  The username for authentication to the proxy server.
  +	</description>
  +        <availability>1.0</availability>
  +      </attribute>
  +        
  +      <attribute>
  +        <name>proxyPass</name>
  +        <required>no</required>
  +        <rtexprvalue>no</rtexprvalue>
  +        <description>
  +	  The password for authentication to the proxy server.
  +	</description>
  +        <availability>1.0</availability>
  +      </attribute>
  +
         <example>
           <usage>
             <comment>
  @@ -151,6 +206,58 @@
             </code>  
           </usage>  
         </example>
  +      <example>
  +        <usage>
  +          <comment>
  +            Specify a document to be scraped with a connection that must be made
  +            through a proxy on a port other than the default 3128.
  +	    Note that a scrape tag must be nested within the body of the page tag.
  +          </comment>
  +          <code>
  +<![CDATA[ 
  +<scrp:page url="http://finance.yahoo.com/q?s=SUNW" proxyServer="proxy.server"
  +proxyPort="3129">
  +   <scrp:scrape id="qt" begin="<table border=1" end="</table>" anchors="true"/>
  +</scrp:page>
  +]]>       
  +          </code>  
  +        </usage>  
  +      </example>
  +      <example>
  +        <usage>
  +          <comment>
  +            Specify a document to be scraped with a connection that must be made
  +            through a proxy.  Use the java system defaults of http.proxyHost and
  +            http.proxyPort.
  +	    Note that a scrape tag must be nested within the body of the page tag.
  +          </comment>
  +          <code>
  +<![CDATA[ 
  +<scrp:page url="http://finance.yahoo.com/q?s=SUNW" useProxy="true">
  +   <scrp:scrape id="qt" begin="<table border=1" end="</table>" anchors="true"/>
  +</scrp:page>
  +]]>       
  +          </code>  
  +        </usage>  
  +      </example>
  +      <example>
  +        <usage>
  +          <comment>
  +            Specify a document to be scraped with a connection that must be made
  +            through a proxy on a port other than the default 3128.  The proxy server
  +            requires authentication.
  +	    Note that a scrape tag must be nested within the body of the page tag.
  +          </comment>
  +          <code>
  +<![CDATA[ 
  +<scrp:page url="http://finance.yahoo.com/q?s=SUNW" proxyServer="proxy.server"
  +proxyPort="3129" proxyName="foo" proxyPass="foobar">
  +   <scrp:scrape id="qt" begin="<table border=1" end="</table>" anchors="true"/>
  +</scrp:page>
  +]]>       
  +          </code>  
  +        </usage>  
  +      </example>
             
       </tag>
   
  @@ -188,11 +295,7 @@
   </scrp:page>
   ]]>               
             </code>
  -	  <comment>
  -	      It is possible to use another tag set nested within the url tag to
  -	      dynamically generate the URL.
  -	  </comment>
  -        </usage>
  +       </usage>
         </example>
   
       </tag>
  @@ -297,6 +400,10 @@
   </scrp:page>
   ]]>
             </code>
  +        </usage>
  +      </example>
  +      <example>
  +        <usage>
   	  <comment>
   	    Set a scrape on a page with results set to have no tags.
   	  </comment>
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>