You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hc.apache.org by Muhammad Kashif <mk...@clickmarks.com> on 2004/03/06 08:04:41 UTC

upgrade from 0.3 to 2.0

Hi Everybody!
 
I was using HttpClient V0.3 , which doesn't support https.
The newer version 2.0 supports https but its structure totally changed,
i.e I was using HttpResponse object also NVPairs from v0.3 , but they
are not found in 2.0.
Than I saw patches of HTTPConnection & RedirectionModule for v0.3-3 I
used them to open https calls.
 
It works for some sites e.g
=====================
https://adwords.google.com/select/
https://console.website.yahoo.com/ 
https://console.website.yahoo.com/apps/recoverpassword/index.cgi
 
 
 
but not work for follwing due to authenticating certificates(Error is
peer NOT authenticated.)
=====================================================================
https://www.etrade.com <https://www.etrade.com/> 
https://www.sun.com/
 
 
 
 
MY CODE (GREEN LINES ARE interacting with HTTPCLient 0.3-3)
====================================================
  // Returns an array containing [ String content of web site, Document
object ]
   private Object[] parse(Tidy tidy, String url, String sCharset, String
sMode, boolean bFullPage) {
 
         
     long lStartTime = System.currentTimeMillis();
 
     vLinks=new Vector();
     counter=0;
     String sFileName=new
String(""+System.currentTimeMillis()+Thread.currentThread().hashCode());
     ByteArrayOutputStream baos=new ByteArrayOutputStream();
     String sOriginalUrl = null;
     try
     {
 
       boolean bPageNotLoaded = true;
       HTTPClient.URI uri = null;
       HTTPResponse rsp = null;
       do {
         //////Form the nvpairs from the url
         NVPair nvpairs[] = null;
         int index = url.indexOf("?");
         sOriginalUrl = url;
         if ( index >= 0 ) {
            Vector v = new Vector();
            String params = url.substring(index+1);
            url = url.substring(0, index);
            StringTokenizer st = new StringTokenizer ( params, "&" );
            while ( st.hasMoreTokens() ) {
              String token = st.nextToken();
              int index2 = token.indexOf("=");
              NVPair form_data = new NVPair(token.substring(0, index2),
token.substring(index2+1));
              v.add( form_data );
            }
            if ( v.size() > 0 ) {
               int len = v.size();
               nvpairs = new NVPair[len];
               for ( int i=0; i<len; i++ ) {
                  nvpairs[i] = (NVPair)v.elementAt(i);
               }
            }
         }
 
         //Use a loop to see if the website is redirecting u
         //with a response code of 302 and if it is then go to the new
redirected url
         /* Sample header looks like this for www.espn.com
         Header:Content-type  value=text/html
         Header:Location  value=http://msn.espn.go.com/
         Header:Connection  value=close
         Header:Date  value=Thu, 26 Jun 2003 22:33:21 GMT
         Header:Content-length  value=0
         Header:Server  value=Netscape-Enterprise/4.1
         */
 
         // Send new user agent string in header
         NVPair[] Headers = new NVPair[1];
         NVPair Header = new NVPair("User-Agent", "Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)");
         Headers[0] = Header;
 
         uri = new HTTPClient.URI(url);
         // Prevents popup from appearing which prompts user to
accept/reject cookie
         CookieModule.setCookiePolicyHandler(null);
 
    We do not need to set security provider class if url is not https
         if ( url.startsWith("https") ){
    
           if ( ! bSSLProviderSet ) {
             // This indicates JSSE module for SSL support
                 java.security.Security.addProvider(new
com.sun.net.ssl.internal.ssl.Provider());
             bSSLProviderSet=true;
           }
         }
 
 
      String a =   uri.getPath().equals("")?"/":uri.getPath();
 
         HTTPConnection con = new HTTPConnection(uri.getScheme(),
uri.getHost(), uri.getPort());
          con.setRawMode(true);
         rsp = con.Get(uri.getPath().equals("")?"/":uri.getPath(),
nvpairs, Headers);
 
         if (rsp.getStatusCode() == 302){
           Enumeration enum = rsp.listHeaders();
           while ( enum.hasMoreElements() ){
             String name = (String)enum.nextElement();
           }
           url = Common.decodeString( rsp.getHeader("Location").trim()
);
         }
         else
         {
           bPageNotLoaded = false;
         }
 
       }while( bPageNotLoaded );
 
       if (rsp.getStatusCode() >= 300)
       {
         System.err.println("Received Error: "+rsp.getReasonLine()+"
code="+rsp.getStatusCode()+"   Headers:"+rsp.listHeaders());
         return null;
       }
       else{
         if (
rsp.getHeader("Content-Type").toLowerCase().indexOf("image/") >= 0 ){
           String sDocument = "<a
href='"+PortalServlet.SERVLET_NAME+"?x="+JABITAT_PARSER+
           "&URL="+Common.encodeString(url)+"&IMAGE=1"+"'
title='"+ResourceText.Strings.get("Common_5")+"'><img src='"+url+"'
border='0' /></a>" ;
 
           ByteArrayInputStream bais=new
ByteArrayInputStream(sDocument.getBytes());
           Document d=tidy.parseDOM(bais, baos);
           Common.ensureTitle(d);
           return new Object[]{sDocument, d};
         }
         else{
           String str=rsp.getText();
           String sEffectiveURI=rsp.getEffectiveURI().toString();
 
           String sEncoding = getEncoding(tidy, rsp, sCharset,
sEffectiveURI);
           String unicodeString = null;
           if ( sEncoding != null )
             unicodeString = StringTools.toUnicode(str, sEncoding);
           else
             unicodeString = str;
 
           ByteArrayInputStream bais=new
ByteArrayInputStream(unicodeString.getBytes("UTF-8"));
 
           tidy.setBaseURL(sEffectiveURI);
           Document d=tidy.parseDOM(bais, baos);
 
           Common.ensureTitle(d);
           if ( sEncoding != null )
               changeEncodingOfPage ( d );
 
           stripUnwantedTags (d);
           String baseHref = getBaseHref (d, url, sEffectiveURI,
bFullPage);
 
           eliminateUnnecessaryAttributesAndFixHref (d, baseHref,
uri.getScheme()+"://"+uri.getHost(), bFullPage);
           putElementIdentifierInEachTag(d);
           String
sDOMString=JabitatDomPrinter.printToString(d.getDocumentElement(), new
JabitatDomPrinter.XMLEscaper());
 
           RequestInfo rinfo=RequestHandler.getRequestInfo();
           if ( sMode != null && sMode.equals("REFRESH") ) {
            return new Object[]{sDOMString, d};
           }
 
           saveAtagsToVector(d, sFileName);
           saveVectorToFile(sDOMString, sOriginalUrl, sFileName);
           Common.cleanupDirectory(getParserFilesDir());
 
           if ( rinfo.getRequestParameter("RB") != null &&
rinfo.getRequestParameter("RB").toString().equals("FP") ) {
             String sParams = null;
             String sHeader = null;
             if ( rinfo.getRequestParameter("FROM") == null ){
               sHeader =
(String)ResourceText.Strings.get("InternetIntranet_6");
 
sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&FID="+sFileName+"
&x="+JABITAT_PARSER;
             }
             else{
               sHeader =
(String)ResourceText.Strings.get("SourceContent_1");
//
sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&CID="+sFileName+"
&ACTION=AP&MODE=ACTIVEX&SHOW=3&x="+JABITAT_PARSER;
 
sParams="x="+JABITAT_PARSER+"&ACTION=AP&MODE=ACTIVEX&SHOW=3&CID="+sFileN
ame+"&FID="+sFileName+"&URL="+Common.encodeString(url)+"&STEP=2&RB=FP";
             }
 
             return new Object[]{Common.print_header(sHeader,
               (String)ResourceText.Strings.get("Common_5")+"<br/><a
class='p2'
href='"+PortalServlet.SERVLET_NAME+"?"+sParams+"'>"+url+"</a>")
               , d};
           }
           else
             return new Object[]{processDOM(d, sFileName), d};
         }
       }
     }
     catch (Exception e)
     {
       e.printStackTrace();
        Logger.error("JabitatParser", e);
        System.err.println("Error handling request: " + e.getMessage());
     }
     return null;
   }
 
 
 
BottomLINE
========= 
 
1)       is any way to open those url also by using this pacth of v0.3-3
?
2)       is there any httpclient version with Httpresponse & NVpair
classes which supports https ? (so compatible with my existing code)
3)       or if I have to upgrade to v2.0 than which classes & methods I
have to use to replace NVpairs & HTTPResponse usage?
 
 
Thanks
Kashif

Re: upgrade from 0.3 to 2.0

Posted by Michael Becke <be...@u.washington.edu>.
Hi Kashif,

I think you may be confusing commons HttpClient (us) with  
http://www.innovation.ch/java/HTTPClient/ (not us).  These two projects  
are not at all related, other than the obvious name similarity.

Mike

On Mar 6, 2004, at 2:04 AM, Muhammad Kashif wrote:

> Hi Everybody!
>
> I was using HttpClient V0.3 , which doesn't support https.
> The newer version 2.0 supports https but its structure totally changed,
> i.e I was using HttpResponse object also NVPairs from v0.3 , but they
> are not found in 2.0.
> Than I saw patches of HTTPConnection & RedirectionModule for v0.3-3 I
> used them to open https calls.
>
> It works for some sites e.g
> =====================
> https://adwords.google.com/select/
> https://console.website.yahoo.com/
> https://console.website.yahoo.com/apps/recoverpassword/index.cgi
>
>
>
> but not work for follwing due to authenticating certificates(Error is
> peer NOT authenticated.)
> =====================================================================
> https://www.etrade.com <https://www.etrade.com/>
> https://www.sun.com/
>
>
>
>
> MY CODE (GREEN LINES ARE interacting with HTTPCLient 0.3-3)
> ====================================================
>   // Returns an array containing [ String content of web site, Document
> object ]
>    private Object[] parse(Tidy tidy, String url, String sCharset,  
> String
> sMode, boolean bFullPage) {
>
>
>      long lStartTime = System.currentTimeMillis();
>
>      vLinks=new Vector();
>      counter=0;
>      String sFileName=new
> String(""+System.currentTimeMillis()+Thread.currentThread().hashCode()) 
> ;
>      ByteArrayOutputStream baos=new ByteArrayOutputStream();
>      String sOriginalUrl = null;
>      try
>      {
>
>        boolean bPageNotLoaded = true;
>        HTTPClient.URI uri = null;
>        HTTPResponse rsp = null;
>        do {
>          //////Form the nvpairs from the url
>          NVPair nvpairs[] = null;
>          int index = url.indexOf("?");
>          sOriginalUrl = url;
>          if ( index >= 0 ) {
>             Vector v = new Vector();
>             String params = url.substring(index+1);
>             url = url.substring(0, index);
>             StringTokenizer st = new StringTokenizer ( params, "&" );
>             while ( st.hasMoreTokens() ) {
>               String token = st.nextToken();
>               int index2 = token.indexOf("=");
>               NVPair form_data = new NVPair(token.substring(0, index2),
> token.substring(index2+1));
>               v.add( form_data );
>             }
>             if ( v.size() > 0 ) {
>                int len = v.size();
>                nvpairs = new NVPair[len];
>                for ( int i=0; i<len; i++ ) {
>                   nvpairs[i] = (NVPair)v.elementAt(i);
>                }
>             }
>          }
>
>          //Use a loop to see if the website is redirecting u
>          //with a response code of 302 and if it is then go to the new
> redirected url
>          /* Sample header looks like this for www.espn.com
>          Header:Content-type  value=text/html
>          Header:Location  value=http://msn.espn.go.com/
>          Header:Connection  value=close
>          Header:Date  value=Thu, 26 Jun 2003 22:33:21 GMT
>          Header:Content-length  value=0
>          Header:Server  value=Netscape-Enterprise/4.1
>          */
>
>          // Send new user agent string in header
>          NVPair[] Headers = new NVPair[1];
>          NVPair Header = new NVPair("User-Agent", "Mozilla/4.0
> (compatible; MSIE 6.0; Windows NT 5.0)");
>          Headers[0] = Header;
>
>          uri = new HTTPClient.URI(url);
>          // Prevents popup from appearing which prompts user to
> accept/reject cookie
>          CookieModule.setCookiePolicyHandler(null);
>
>     We do not need to set security provider class if url is not https
>          if ( url.startsWith("https") ){
>
>            if ( ! bSSLProviderSet ) {
>              // This indicates JSSE module for SSL support
>                  java.security.Security.addProvider(new
> com.sun.net.ssl.internal.ssl.Provider());
>              bSSLProviderSet=true;
>            }
>          }
>
>
>       String a =   uri.getPath().equals("")?"/":uri.getPath();
>
>          HTTPConnection con = new HTTPConnection(uri.getScheme(),
> uri.getHost(), uri.getPort());
>           con.setRawMode(true);
>          rsp = con.Get(uri.getPath().equals("")?"/":uri.getPath(),
> nvpairs, Headers);
>
>          if (rsp.getStatusCode() == 302){
>            Enumeration enum = rsp.listHeaders();
>            while ( enum.hasMoreElements() ){
>              String name = (String)enum.nextElement();
>            }
>            url = Common.decodeString( rsp.getHeader("Location").trim()
> );
>          }
>          else
>          {
>            bPageNotLoaded = false;
>          }
>
>        }while( bPageNotLoaded );
>
>        if (rsp.getStatusCode() >= 300)
>        {
>          System.err.println("Received Error: "+rsp.getReasonLine()+"
> code="+rsp.getStatusCode()+"   Headers:"+rsp.listHeaders());
>          return null;
>        }
>        else{
>          if (
> rsp.getHeader("Content-Type").toLowerCase().indexOf("image/") >= 0 ){
>            String sDocument = "<a
> href='"+PortalServlet.SERVLET_NAME+"?x="+JABITAT_PARSER+
>            "&URL="+Common.encodeString(url)+"&IMAGE=1"+"'
> title='"+ResourceText.Strings.get("Common_5")+"'><img src='"+url+"'
> border='0' /></a>" ;
>
>            ByteArrayInputStream bais=new
> ByteArrayInputStream(sDocument.getBytes());
>            Document d=tidy.parseDOM(bais, baos);
>            Common.ensureTitle(d);
>            return new Object[]{sDocument, d};
>          }
>          else{
>            String str=rsp.getText();
>            String sEffectiveURI=rsp.getEffectiveURI().toString();
>
>            String sEncoding = getEncoding(tidy, rsp, sCharset,
> sEffectiveURI);
>            String unicodeString = null;
>            if ( sEncoding != null )
>              unicodeString = StringTools.toUnicode(str, sEncoding);
>            else
>              unicodeString = str;
>
>            ByteArrayInputStream bais=new
> ByteArrayInputStream(unicodeString.getBytes("UTF-8"));
>
>            tidy.setBaseURL(sEffectiveURI);
>            Document d=tidy.parseDOM(bais, baos);
>
>            Common.ensureTitle(d);
>            if ( sEncoding != null )
>                changeEncodingOfPage ( d );
>
>            stripUnwantedTags (d);
>            String baseHref = getBaseHref (d, url, sEffectiveURI,
> bFullPage);
>
>            eliminateUnnecessaryAttributesAndFixHref (d, baseHref,
> uri.getScheme()+"://"+uri.getHost(), bFullPage);
>            putElementIdentifierInEachTag(d);
>            String
> sDOMString=JabitatDomPrinter.printToString(d.getDocumentElement(), new
> JabitatDomPrinter.XMLEscaper());
>
>            RequestInfo rinfo=RequestHandler.getRequestInfo();
>            if ( sMode != null && sMode.equals("REFRESH") ) {
>             return new Object[]{sDOMString, d};
>            }
>
>            saveAtagsToVector(d, sFileName);
>            saveVectorToFile(sDOMString, sOriginalUrl, sFileName);
>            Common.cleanupDirectory(getParserFilesDir());
>
>            if ( rinfo.getRequestParameter("RB") != null &&
> rinfo.getRequestParameter("RB").toString().equals("FP") ) {
>              String sParams = null;
>              String sHeader = null;
>              if ( rinfo.getRequestParameter("FROM") == null ){
>                sHeader =
> (String)ResourceText.Strings.get("InternetIntranet_6");
>
> sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&FID="+sFileName+ 
> "
> &x="+JABITAT_PARSER;
>              }
>              else{
>                sHeader =
> (String)ResourceText.Strings.get("SourceContent_1");
> //
> sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&CID="+sFileName+ 
> "
> &ACTION=AP&MODE=ACTIVEX&SHOW=3&x="+JABITAT_PARSER;
>
> sParams="x="+JABITAT_PARSER+"&ACTION=AP&MODE=ACTIVEX&SHOW=3&CID="+sFile 
> N
> ame+"&FID="+sFileName+"&URL="+Common.encodeString(url)+"&STEP=2&RB=FP";
>              }
>
>              return new Object[]{Common.print_header(sHeader,
>                (String)ResourceText.Strings.get("Common_5")+"<br/><a
> class='p2'
> href='"+PortalServlet.SERVLET_NAME+"?"+sParams+"'>"+url+"</a>")
>                , d};
>            }
>            else
>              return new Object[]{processDOM(d, sFileName), d};
>          }
>        }
>      }
>      catch (Exception e)
>      {
>        e.printStackTrace();
>         Logger.error("JabitatParser", e);
>         System.err.println("Error handling request: " +  
> e.getMessage());
>      }
>      return null;
>    }
>
>
>
> BottomLINE
> =========
>
> 1)       is any way to open those url also by using this pacth of  
> v0.3-3
> ?
> 2)       is there any httpclient version with Httpresponse & NVpair
> classes which supports https ? (so compatible with my existing code)
> 3)       or if I have to upgrade to v2.0 than which classes & methods I
> have to use to replace NVpairs & HTTPResponse usage?
>
>
> Thanks
> Kashif


---------------------------------------------------------------------
To unsubscribe, e-mail: commons-httpclient-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-httpclient-dev-help@jakarta.apache.org