You are viewing a plain text version of this content. The canonical link for it is here.
Posted to httpclient-users@hc.apache.org by Eugene Dzhurinsky <bo...@redwerk.com> on 2006/07/20 17:04:04 UTC

handling uri with non-latin1 charachters

Hello

I facing strange problem with the URI:

http://www.vu.lt/site_files/DRS/PRAц┘б═YMAS REZIDENTURAI 2006.doc.

While it works fine being pasted in browser, the HTTPClient doesn't handle it
poperly
==============================================================================

import java.io.IOException;

import junit.framework.Assert;
import junit.framework.TestCase;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.HttpVersion;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

public class HTTPClientTest extends TestCase {

    private static Logger log = Logger.getLogger(HTTPClientTest.class);

    HttpClient client;

    GetMethod get;

    String url;

    protected void setUp() throws Exception {
        client = new HttpClient();
        client.getParams().setBooleanParameter(
                "http.protocol.single-cookie-header", true);
        url = "http://www.vu.lt/site_files/DRS/PRAц┘б═YMAS REZIDENTURAI 2006.doc";
        BasicConfigurator.configure();
        log.setLevel(Level.DEBUG);
    }

    public void testResultcode() {
        try {
            //URI uri = getURI(url);
            //log.debug(uri.getURI());
            URI uri = new URI(url,false);
            get = new GetMethod();
            get.setURI(uri);
            setMethodParameters();
            client.executeMethod(get);
            get = handleRedirects(20);
            Assert.assertEquals(HttpStatus.SC_OK, get.getStatusCode());
        } catch (HttpException e) {
            e.printStackTrace(System.out);
        } catch (IOException e) {
            e.printStackTrace(System.out);
        }
    }

    private GetMethod handleRedirects(int redirects) throws URIException,
            IOException {
        switch (get.getStatusCode()) {
        case HttpStatus.SC_MOVED_PERMANENTLY:
        case HttpStatus.SC_MOVED_TEMPORARILY:
        case HttpStatus.SC_SEE_OTHER:
        case HttpStatus.SC_TEMPORARY_REDIRECT:
            Header[] headers = get.getResponseHeaders("location");
            if (headers != null && headers.length > 0) {
                String location = customEscape(headers[headers.length - 1]
                        .getValue());
                if (location.indexOf("://") == -1) {
                    URI uri = new URI(get.getURI(), location, location
                            .indexOf("%") > -1);
                    location = uri.getURI();
                }
                if (log.isDebugEnabled())
                    log.debug("Handling redirect for " + location + " level "
                            + redirects);
                get.releaseConnection();
                get = new GetMethod();
                get.setURI(getURI(location));
                setMethodParameters();
                client.executeMethod(get);
                if (redirects > 0)
                    handleRedirects(redirects - 1);
            }
        }
        return get;
    }

    private void setMethodParameters() {
        get.setFollowRedirects(false);
        HttpMethodParams p = get.getParams();
        p.setVersion(new HttpVersion(1, 1));
        p.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
    }

    private URI getURI(String _url) throws URIException {
        _url = customEscape(_url);
        log.debug("Escaped is "+_url + " : "+(_url.indexOf(" ") > -1 && _url.indexOf("%") > -1));
        return new URI(_url, _url.indexOf('%') != -1);
    }

    private String customEscape(String _url) {
        String _internal_url = _url.indexOf(" ") > -1 && _url.indexOf("%") > -1 ? _url
                .replaceAll(" ", "%20")
                : _url;
        return _internal_url;
    }
}
==============================================================================

May be somebody could suggest what do I miss here?

-- 
Eugene N Dzhurinsky

---------------------------------------------------------------------
To unsubscribe, e-mail: httpclient-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: httpclient-user-help@jakarta.apache.org


Re: handling uri with non-latin1 charachters

Posted by Roland Weber <RO...@de.ibm.com>.
Hello Eugene,

URLs are specified to be US-ASCII only. 
http://jakarta.apache.org/commons/httpclient/charencodings.html#URLs

Because not everyone cares about specifications, there are some
tweaks in HttpClient. Like this parameter:
http://jakarta.apache.org/commons/httpclient/apidocs/org/apache/commons/httpclient/params/HttpMethodParams.html#HTTP_ELEMENT_CHARSET

cheers,
  Roland


---------------------------------------------------------------------
To unsubscribe, e-mail: httpclient-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: httpclient-user-help@jakarta.apache.org