You are viewing a plain text version of this content. The canonical link for it is here.
Posted to httpclient-users@hc.apache.org by Eugene Dzhurinsky <bo...@redwerk.com> on 2006/07/20 17:04:04 UTC
handling uri with non-latin1 charachters
Hello
I facing strange problem with the URI:
http://www.vu.lt/site_files/DRS/PRAц┘б═YMAS REZIDENTURAI 2006.doc.
While it works fine being pasted in browser, the HTTPClient doesn't handle it
poperly
==============================================================================
import java.io.IOException;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.HttpVersion;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
public class HTTPClientTest extends TestCase {
private static Logger log = Logger.getLogger(HTTPClientTest.class);
HttpClient client;
GetMethod get;
String url;
protected void setUp() throws Exception {
client = new HttpClient();
client.getParams().setBooleanParameter(
"http.protocol.single-cookie-header", true);
url = "http://www.vu.lt/site_files/DRS/PRAц┘б═YMAS REZIDENTURAI 2006.doc";
BasicConfigurator.configure();
log.setLevel(Level.DEBUG);
}
public void testResultcode() {
try {
//URI uri = getURI(url);
//log.debug(uri.getURI());
URI uri = new URI(url,false);
get = new GetMethod();
get.setURI(uri);
setMethodParameters();
client.executeMethod(get);
get = handleRedirects(20);
Assert.assertEquals(HttpStatus.SC_OK, get.getStatusCode());
} catch (HttpException e) {
e.printStackTrace(System.out);
} catch (IOException e) {
e.printStackTrace(System.out);
}
}
private GetMethod handleRedirects(int redirects) throws URIException,
IOException {
switch (get.getStatusCode()) {
case HttpStatus.SC_MOVED_PERMANENTLY:
case HttpStatus.SC_MOVED_TEMPORARILY:
case HttpStatus.SC_SEE_OTHER:
case HttpStatus.SC_TEMPORARY_REDIRECT:
Header[] headers = get.getResponseHeaders("location");
if (headers != null && headers.length > 0) {
String location = customEscape(headers[headers.length - 1]
.getValue());
if (location.indexOf("://") == -1) {
URI uri = new URI(get.getURI(), location, location
.indexOf("%") > -1);
location = uri.getURI();
}
if (log.isDebugEnabled())
log.debug("Handling redirect for " + location + " level "
+ redirects);
get.releaseConnection();
get = new GetMethod();
get.setURI(getURI(location));
setMethodParameters();
client.executeMethod(get);
if (redirects > 0)
handleRedirects(redirects - 1);
}
}
return get;
}
private void setMethodParameters() {
get.setFollowRedirects(false);
HttpMethodParams p = get.getParams();
p.setVersion(new HttpVersion(1, 1));
p.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
}
private URI getURI(String _url) throws URIException {
_url = customEscape(_url);
log.debug("Escaped is "+_url + " : "+(_url.indexOf(" ") > -1 && _url.indexOf("%") > -1));
return new URI(_url, _url.indexOf('%') != -1);
}
private String customEscape(String _url) {
String _internal_url = _url.indexOf(" ") > -1 && _url.indexOf("%") > -1 ? _url
.replaceAll(" ", "%20")
: _url;
return _internal_url;
}
}
==============================================================================
May be somebody could suggest what do I miss here?
--
Eugene N Dzhurinsky
---------------------------------------------------------------------
To unsubscribe, e-mail: httpclient-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: httpclient-user-help@jakarta.apache.org
Re: handling uri with non-latin1 charachters
Posted by Roland Weber <RO...@de.ibm.com>.
Hello Eugene,
URLs are specified to be US-ASCII only.
http://jakarta.apache.org/commons/httpclient/charencodings.html#URLs
Because not everyone cares about specifications, there are some
tweaks in HttpClient. Like this parameter:
http://jakarta.apache.org/commons/httpclient/apidocs/org/apache/commons/httpclient/params/HttpMethodParams.html#HTTP_ELEMENT_CHARSET
cheers,
Roland
---------------------------------------------------------------------
To unsubscribe, e-mail: httpclient-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: httpclient-user-help@jakarta.apache.org