You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hc.apache.org by Muhammad Kashif <mk...@clickmarks.com> on 2004/03/06 08:04:41 UTC
upgrade from 0.3 to 2.0
Hi Everybody!
I was using HttpClient V0.3 , which doesn't support https.
The newer version 2.0 supports https but its structure totally changed,
i.e I was using HttpResponse object also NVPairs from v0.3 , but they
are not found in 2.0.
Than I saw patches of HTTPConnection & RedirectionModule for v0.3-3 I
used them to open https calls.
It works for some sites e.g
=====================
https://adwords.google.com/select/
https://console.website.yahoo.com/
https://console.website.yahoo.com/apps/recoverpassword/index.cgi
but not work for follwing due to authenticating certificates(Error is
peer NOT authenticated.)
=====================================================================
https://www.etrade.com <https://www.etrade.com/>
https://www.sun.com/
MY CODE (GREEN LINES ARE interacting with HTTPCLient 0.3-3)
====================================================
// Returns an array containing [ String content of web site, Document
object ]
private Object[] parse(Tidy tidy, String url, String sCharset, String
sMode, boolean bFullPage) {
long lStartTime = System.currentTimeMillis();
vLinks=new Vector();
counter=0;
String sFileName=new
String(""+System.currentTimeMillis()+Thread.currentThread().hashCode());
ByteArrayOutputStream baos=new ByteArrayOutputStream();
String sOriginalUrl = null;
try
{
boolean bPageNotLoaded = true;
HTTPClient.URI uri = null;
HTTPResponse rsp = null;
do {
//////Form the nvpairs from the url
NVPair nvpairs[] = null;
int index = url.indexOf("?");
sOriginalUrl = url;
if ( index >= 0 ) {
Vector v = new Vector();
String params = url.substring(index+1);
url = url.substring(0, index);
StringTokenizer st = new StringTokenizer ( params, "&" );
while ( st.hasMoreTokens() ) {
String token = st.nextToken();
int index2 = token.indexOf("=");
NVPair form_data = new NVPair(token.substring(0, index2),
token.substring(index2+1));
v.add( form_data );
}
if ( v.size() > 0 ) {
int len = v.size();
nvpairs = new NVPair[len];
for ( int i=0; i<len; i++ ) {
nvpairs[i] = (NVPair)v.elementAt(i);
}
}
}
//Use a loop to see if the website is redirecting u
//with a response code of 302 and if it is then go to the new
redirected url
/* Sample header looks like this for www.espn.com
Header:Content-type value=text/html
Header:Location value=http://msn.espn.go.com/
Header:Connection value=close
Header:Date value=Thu, 26 Jun 2003 22:33:21 GMT
Header:Content-length value=0
Header:Server value=Netscape-Enterprise/4.1
*/
// Send new user agent string in header
NVPair[] Headers = new NVPair[1];
NVPair Header = new NVPair("User-Agent", "Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)");
Headers[0] = Header;
uri = new HTTPClient.URI(url);
// Prevents popup from appearing which prompts user to
accept/reject cookie
CookieModule.setCookiePolicyHandler(null);
We do not need to set security provider class if url is not https
if ( url.startsWith("https") ){
if ( ! bSSLProviderSet ) {
// This indicates JSSE module for SSL support
java.security.Security.addProvider(new
com.sun.net.ssl.internal.ssl.Provider());
bSSLProviderSet=true;
}
}
String a = uri.getPath().equals("")?"/":uri.getPath();
HTTPConnection con = new HTTPConnection(uri.getScheme(),
uri.getHost(), uri.getPort());
con.setRawMode(true);
rsp = con.Get(uri.getPath().equals("")?"/":uri.getPath(),
nvpairs, Headers);
if (rsp.getStatusCode() == 302){
Enumeration enum = rsp.listHeaders();
while ( enum.hasMoreElements() ){
String name = (String)enum.nextElement();
}
url = Common.decodeString( rsp.getHeader("Location").trim()
);
}
else
{
bPageNotLoaded = false;
}
}while( bPageNotLoaded );
if (rsp.getStatusCode() >= 300)
{
System.err.println("Received Error: "+rsp.getReasonLine()+"
code="+rsp.getStatusCode()+" Headers:"+rsp.listHeaders());
return null;
}
else{
if (
rsp.getHeader("Content-Type").toLowerCase().indexOf("image/") >= 0 ){
String sDocument = "<a
href='"+PortalServlet.SERVLET_NAME+"?x="+JABITAT_PARSER+
"&URL="+Common.encodeString(url)+"&IMAGE=1"+"'
title='"+ResourceText.Strings.get("Common_5")+"'><img src='"+url+"'
border='0' /></a>" ;
ByteArrayInputStream bais=new
ByteArrayInputStream(sDocument.getBytes());
Document d=tidy.parseDOM(bais, baos);
Common.ensureTitle(d);
return new Object[]{sDocument, d};
}
else{
String str=rsp.getText();
String sEffectiveURI=rsp.getEffectiveURI().toString();
String sEncoding = getEncoding(tidy, rsp, sCharset,
sEffectiveURI);
String unicodeString = null;
if ( sEncoding != null )
unicodeString = StringTools.toUnicode(str, sEncoding);
else
unicodeString = str;
ByteArrayInputStream bais=new
ByteArrayInputStream(unicodeString.getBytes("UTF-8"));
tidy.setBaseURL(sEffectiveURI);
Document d=tidy.parseDOM(bais, baos);
Common.ensureTitle(d);
if ( sEncoding != null )
changeEncodingOfPage ( d );
stripUnwantedTags (d);
String baseHref = getBaseHref (d, url, sEffectiveURI,
bFullPage);
eliminateUnnecessaryAttributesAndFixHref (d, baseHref,
uri.getScheme()+"://"+uri.getHost(), bFullPage);
putElementIdentifierInEachTag(d);
String
sDOMString=JabitatDomPrinter.printToString(d.getDocumentElement(), new
JabitatDomPrinter.XMLEscaper());
RequestInfo rinfo=RequestHandler.getRequestInfo();
if ( sMode != null && sMode.equals("REFRESH") ) {
return new Object[]{sDOMString, d};
}
saveAtagsToVector(d, sFileName);
saveVectorToFile(sDOMString, sOriginalUrl, sFileName);
Common.cleanupDirectory(getParserFilesDir());
if ( rinfo.getRequestParameter("RB") != null &&
rinfo.getRequestParameter("RB").toString().equals("FP") ) {
String sParams = null;
String sHeader = null;
if ( rinfo.getRequestParameter("FROM") == null ){
sHeader =
(String)ResourceText.Strings.get("InternetIntranet_6");
sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&FID="+sFileName+"
&x="+JABITAT_PARSER;
}
else{
sHeader =
(String)ResourceText.Strings.get("SourceContent_1");
//
sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&CID="+sFileName+"
&ACTION=AP&MODE=ACTIVEX&SHOW=3&x="+JABITAT_PARSER;
sParams="x="+JABITAT_PARSER+"&ACTION=AP&MODE=ACTIVEX&SHOW=3&CID="+sFileN
ame+"&FID="+sFileName+"&URL="+Common.encodeString(url)+"&STEP=2&RB=FP";
}
return new Object[]{Common.print_header(sHeader,
(String)ResourceText.Strings.get("Common_5")+"<br/><a
class='p2'
href='"+PortalServlet.SERVLET_NAME+"?"+sParams+"'>"+url+"</a>")
, d};
}
else
return new Object[]{processDOM(d, sFileName), d};
}
}
}
catch (Exception e)
{
e.printStackTrace();
Logger.error("JabitatParser", e);
System.err.println("Error handling request: " + e.getMessage());
}
return null;
}
BottomLINE
=========
1) is any way to open those url also by using this pacth of v0.3-3
?
2) is there any httpclient version with Httpresponse & NVpair
classes which supports https ? (so compatible with my existing code)
3) or if I have to upgrade to v2.0 than which classes & methods I
have to use to replace NVpairs & HTTPResponse usage?
Thanks
Kashif
Re: upgrade from 0.3 to 2.0
Posted by Michael Becke <be...@u.washington.edu>.
Hi Kashif,
I think you may be confusing commons HttpClient (us) with
http://www.innovation.ch/java/HTTPClient/ (not us). These two projects
are not at all related, other than the obvious name similarity.
Mike
On Mar 6, 2004, at 2:04 AM, Muhammad Kashif wrote:
> Hi Everybody!
>
> I was using HttpClient V0.3 , which doesn't support https.
> The newer version 2.0 supports https but its structure totally changed,
> i.e I was using HttpResponse object also NVPairs from v0.3 , but they
> are not found in 2.0.
> Than I saw patches of HTTPConnection & RedirectionModule for v0.3-3 I
> used them to open https calls.
>
> It works for some sites e.g
> =====================
> https://adwords.google.com/select/
> https://console.website.yahoo.com/
> https://console.website.yahoo.com/apps/recoverpassword/index.cgi
>
>
>
> but not work for follwing due to authenticating certificates(Error is
> peer NOT authenticated.)
> =====================================================================
> https://www.etrade.com <https://www.etrade.com/>
> https://www.sun.com/
>
>
>
>
> MY CODE (GREEN LINES ARE interacting with HTTPCLient 0.3-3)
> ====================================================
> // Returns an array containing [ String content of web site, Document
> object ]
> private Object[] parse(Tidy tidy, String url, String sCharset,
> String
> sMode, boolean bFullPage) {
>
>
> long lStartTime = System.currentTimeMillis();
>
> vLinks=new Vector();
> counter=0;
> String sFileName=new
> String(""+System.currentTimeMillis()+Thread.currentThread().hashCode())
> ;
> ByteArrayOutputStream baos=new ByteArrayOutputStream();
> String sOriginalUrl = null;
> try
> {
>
> boolean bPageNotLoaded = true;
> HTTPClient.URI uri = null;
> HTTPResponse rsp = null;
> do {
> //////Form the nvpairs from the url
> NVPair nvpairs[] = null;
> int index = url.indexOf("?");
> sOriginalUrl = url;
> if ( index >= 0 ) {
> Vector v = new Vector();
> String params = url.substring(index+1);
> url = url.substring(0, index);
> StringTokenizer st = new StringTokenizer ( params, "&" );
> while ( st.hasMoreTokens() ) {
> String token = st.nextToken();
> int index2 = token.indexOf("=");
> NVPair form_data = new NVPair(token.substring(0, index2),
> token.substring(index2+1));
> v.add( form_data );
> }
> if ( v.size() > 0 ) {
> int len = v.size();
> nvpairs = new NVPair[len];
> for ( int i=0; i<len; i++ ) {
> nvpairs[i] = (NVPair)v.elementAt(i);
> }
> }
> }
>
> //Use a loop to see if the website is redirecting u
> //with a response code of 302 and if it is then go to the new
> redirected url
> /* Sample header looks like this for www.espn.com
> Header:Content-type value=text/html
> Header:Location value=http://msn.espn.go.com/
> Header:Connection value=close
> Header:Date value=Thu, 26 Jun 2003 22:33:21 GMT
> Header:Content-length value=0
> Header:Server value=Netscape-Enterprise/4.1
> */
>
> // Send new user agent string in header
> NVPair[] Headers = new NVPair[1];
> NVPair Header = new NVPair("User-Agent", "Mozilla/4.0
> (compatible; MSIE 6.0; Windows NT 5.0)");
> Headers[0] = Header;
>
> uri = new HTTPClient.URI(url);
> // Prevents popup from appearing which prompts user to
> accept/reject cookie
> CookieModule.setCookiePolicyHandler(null);
>
> We do not need to set security provider class if url is not https
> if ( url.startsWith("https") ){
>
> if ( ! bSSLProviderSet ) {
> // This indicates JSSE module for SSL support
> java.security.Security.addProvider(new
> com.sun.net.ssl.internal.ssl.Provider());
> bSSLProviderSet=true;
> }
> }
>
>
> String a = uri.getPath().equals("")?"/":uri.getPath();
>
> HTTPConnection con = new HTTPConnection(uri.getScheme(),
> uri.getHost(), uri.getPort());
> con.setRawMode(true);
> rsp = con.Get(uri.getPath().equals("")?"/":uri.getPath(),
> nvpairs, Headers);
>
> if (rsp.getStatusCode() == 302){
> Enumeration enum = rsp.listHeaders();
> while ( enum.hasMoreElements() ){
> String name = (String)enum.nextElement();
> }
> url = Common.decodeString( rsp.getHeader("Location").trim()
> );
> }
> else
> {
> bPageNotLoaded = false;
> }
>
> }while( bPageNotLoaded );
>
> if (rsp.getStatusCode() >= 300)
> {
> System.err.println("Received Error: "+rsp.getReasonLine()+"
> code="+rsp.getStatusCode()+" Headers:"+rsp.listHeaders());
> return null;
> }
> else{
> if (
> rsp.getHeader("Content-Type").toLowerCase().indexOf("image/") >= 0 ){
> String sDocument = "<a
> href='"+PortalServlet.SERVLET_NAME+"?x="+JABITAT_PARSER+
> "&URL="+Common.encodeString(url)+"&IMAGE=1"+"'
> title='"+ResourceText.Strings.get("Common_5")+"'><img src='"+url+"'
> border='0' /></a>" ;
>
> ByteArrayInputStream bais=new
> ByteArrayInputStream(sDocument.getBytes());
> Document d=tidy.parseDOM(bais, baos);
> Common.ensureTitle(d);
> return new Object[]{sDocument, d};
> }
> else{
> String str=rsp.getText();
> String sEffectiveURI=rsp.getEffectiveURI().toString();
>
> String sEncoding = getEncoding(tidy, rsp, sCharset,
> sEffectiveURI);
> String unicodeString = null;
> if ( sEncoding != null )
> unicodeString = StringTools.toUnicode(str, sEncoding);
> else
> unicodeString = str;
>
> ByteArrayInputStream bais=new
> ByteArrayInputStream(unicodeString.getBytes("UTF-8"));
>
> tidy.setBaseURL(sEffectiveURI);
> Document d=tidy.parseDOM(bais, baos);
>
> Common.ensureTitle(d);
> if ( sEncoding != null )
> changeEncodingOfPage ( d );
>
> stripUnwantedTags (d);
> String baseHref = getBaseHref (d, url, sEffectiveURI,
> bFullPage);
>
> eliminateUnnecessaryAttributesAndFixHref (d, baseHref,
> uri.getScheme()+"://"+uri.getHost(), bFullPage);
> putElementIdentifierInEachTag(d);
> String
> sDOMString=JabitatDomPrinter.printToString(d.getDocumentElement(), new
> JabitatDomPrinter.XMLEscaper());
>
> RequestInfo rinfo=RequestHandler.getRequestInfo();
> if ( sMode != null && sMode.equals("REFRESH") ) {
> return new Object[]{sDOMString, d};
> }
>
> saveAtagsToVector(d, sFileName);
> saveVectorToFile(sDOMString, sOriginalUrl, sFileName);
> Common.cleanupDirectory(getParserFilesDir());
>
> if ( rinfo.getRequestParameter("RB") != null &&
> rinfo.getRequestParameter("RB").toString().equals("FP") ) {
> String sParams = null;
> String sHeader = null;
> if ( rinfo.getRequestParameter("FROM") == null ){
> sHeader =
> (String)ResourceText.Strings.get("InternetIntranet_6");
>
> sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&FID="+sFileName+
> "
> &x="+JABITAT_PARSER;
> }
> else{
> sHeader =
> (String)ResourceText.Strings.get("SourceContent_1");
> //
> sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&CID="+sFileName+
> "
> &ACTION=AP&MODE=ACTIVEX&SHOW=3&x="+JABITAT_PARSER;
>
> sParams="x="+JABITAT_PARSER+"&ACTION=AP&MODE=ACTIVEX&SHOW=3&CID="+sFile
> N
> ame+"&FID="+sFileName+"&URL="+Common.encodeString(url)+"&STEP=2&RB=FP";
> }
>
> return new Object[]{Common.print_header(sHeader,
> (String)ResourceText.Strings.get("Common_5")+"<br/><a
> class='p2'
> href='"+PortalServlet.SERVLET_NAME+"?"+sParams+"'>"+url+"</a>")
> , d};
> }
> else
> return new Object[]{processDOM(d, sFileName), d};
> }
> }
> }
> catch (Exception e)
> {
> e.printStackTrace();
> Logger.error("JabitatParser", e);
> System.err.println("Error handling request: " +
> e.getMessage());
> }
> return null;
> }
>
>
>
> BottomLINE
> =========
>
> 1) is any way to open those url also by using this pacth of
> v0.3-3
> ?
> 2) is there any httpclient version with Httpresponse & NVpair
> classes which supports https ? (so compatible with my existing code)
> 3) or if I have to upgrade to v2.0 than which classes & methods I
> have to use to replace NVpairs & HTTPResponse usage?
>
>
> Thanks
> Kashif
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-httpclient-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-httpclient-dev-help@jakarta.apache.org