You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/01/08 18:25:59 UTC
svn commit: r1430397 - in /lucene/dev/branches/branch_4x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/servlet/
solr/core/src/test/org/apache/solr/servlet/
Author: uschindler
Date: Tue Jan 8 17:25:59 2013
New Revision: 1430397
URL: http://svn.apache.org/viewvc?rev=1430397&view=rev
Log:
Merged revision(s) 1430396 from lucene/dev/trunk:
SOLR-4283: Improvements for URL-Decoding
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1430397&r1=1430396&r2=1430397&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Tue Jan 8 17:25:59 2013
@@ -175,12 +175,14 @@ New Features
that can be set to false to not filter. Its useful when there is already a spatial
filter query but you also need to sort or boost by distance. (David Smiley)
-* SOLR-4265: Solr now parses request parameters (in URL or sent with POST using
- content-type application/x-www-form-urlencoded) in its dispatcher code. It no
+* SOLR-4265, SOLR-4283: Solr now parses request parameters (in URL or sent with POST
+ using content-type application/x-www-form-urlencoded) in its dispatcher code. It no
longer relies on special configuration settings in Tomcat or other web containers
- to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Also
- the maximum length of x-www-form-urlencoded POST parameters can now be configured
- through the requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
+ to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Query
+ strings passed in via the URL need to be properly-%-escaped, UTF-8 encoded
+ bytes, otherwise Solr refuses to handle the request. The maximum length of
+ x-www-form-urlencoded POST parameters can now be configured through the
+ requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
solrconfig.xml (defaults to 2 MiB). Solr now works out of the box with
e.g. Tomcat, JBoss,... (Uwe Schindler, Dawid Weiss, Alex Rocher)
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java?rev=1430397&r1=1430396&r2=1430397&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java Tue Jan 8 17:25:59 2013
@@ -20,9 +20,13 @@ package org.apache.solr.servlet;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
import java.net.URL;
-import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@@ -32,20 +36,20 @@ import java.util.Locale;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.input.BoundedInputStream;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
+import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
+import org.apache.solr.common.util.FastInputStream;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
@@ -71,7 +75,7 @@ public class SolrRequestParsers
/** Default instance for e.g. admin requests. Limits to 2 MB uploads and does not allow remote streams. */
public static final SolrRequestParsers DEFAULT = new SolrRequestParsers();
-
+
/**
* Pass in an xml configuration. A null configuration will enable
* everything with maximum values.
@@ -197,37 +201,140 @@ public class SolrRequestParsers
*/
public static MultiMapSolrParams parseQueryString(String queryString) {
Map<String,String[]> map = new HashMap<String, String[]>();
- parseQueryString(queryString, "UTF-8", map);
+ parseQueryString(queryString, map);
return new MultiMapSolrParams(map);
}
/**
- * Given a url-encoded query string, map it into the given map
+ * Given a url-encoded query string (UTF-8), map it into the given map
* @param queryString as given from URL
- * @param charset to be used to decode %-encoding
* @param map place all parameters in this map
*/
- static void parseQueryString(String queryString, String charset, Map<String,String[]> map) {
- if( queryString != null && queryString.length() > 0 ) {
+ static void parseQueryString(final String queryString, final Map<String,String[]> map) {
+ if (queryString != null && queryString.length() > 0) {
try {
- for( String kv : queryString.split( "&" ) ) {
- int idx = kv.indexOf( '=' );
- if( idx >= 0 ) {
- String name = URLDecoder.decode( kv.substring( 0, idx ), charset);
- String value = URLDecoder.decode( kv.substring( idx+1 ), charset);
- MultiMapSolrParams.addParam( name, value, map );
- } else {
- String name = URLDecoder.decode( kv, charset );
- MultiMapSolrParams.addParam( name, "", map );
+ final int len = queryString.length();
+ // this input stream emulates to get the raw bytes from the URL as passed to servlet container, it disallows any byte > 127 and enforces to %-escape them:
+ final InputStream in = new InputStream() {
+ int pos = 0;
+ @Override
+ public int read() {
+ if (pos < len) {
+ final char ch = queryString.charAt(pos);
+ if (ch > 127) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: The query string contains a not-%-escaped byte > 127 at position " + pos);
+ }
+ pos++;
+ return ch;
+ } else {
+ return -1;
+ }
}
- }
+ };
+ parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map);
+ } catch (IOException ioe) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
+ }
+ }
+ }
+
+ /**
+ * Given a url-encoded form from POST content (as InputStream), map it into the given map.
+ * The given InputStream should be buffered!
+ * @param postContent to be parsed
+ * @param charset to be used to decode resulting bytes after %-decoding
+ * @param map place all parameters in this map
+ */
+ @SuppressWarnings("fallthrough")
+ static long parseFormDataContent(final InputStream postContent, final long maxLen, final Charset charset, final Map<String,String[]> map) throws IOException {
+ final CharsetDecoder charsetDecoder = charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ long len = 0L, keyPos = 0L, valuePos = 0L;
+ final ByteArrayOutputStream2 keyStream = new ByteArrayOutputStream2(),
+ valueStream = new ByteArrayOutputStream2();
+ ByteArrayOutputStream2 currentStream = keyStream;
+ for(;;) {
+ int b = postContent.read();
+ switch (b) {
+ case -1: // end of stream
+ case '&': // separator
+ if (keyStream.size() > 0) {
+ final String key = decodeChars(keyStream, keyPos, charsetDecoder), value = decodeChars(valueStream, valuePos, charsetDecoder);
+ MultiMapSolrParams.addParam(key, value, map);
+ } else if (valueStream.size() > 0) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
+ }
+ keyStream.reset();
+ valueStream.reset();
+ keyPos = valuePos = len + 1;
+ currentStream = keyStream;
+ break;
+ case '+': // space replacement
+ currentStream.write(' ');
+ break;
+ case '%': // escape
+ final int upper = digit16(b = postContent.read());
+ len++;
+ final int lower = digit16(b = postContent.read());
+ len++;
+ currentStream.write(((upper << 4) + lower));
+ break;
+ case '=': // kv separator
+ if (currentStream == keyStream) {
+ valuePos = len + 1;
+ currentStream = valueStream;
+ break;
+ }
+ // fall-through
+ default:
+ currentStream.write(b);
}
- catch( UnsupportedEncodingException uex ) {
- throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, uex );
+ if (b == -1) {
+ break;
+ }
+ len++;
+ if (len > maxLen) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen/1024L) + " KB");
}
}
+ return len;
}
-
+
+ private static String decodeChars(ByteArrayOutputStream2 stream, long position, CharsetDecoder charsetDecoder) {
+ try {
+ return charsetDecoder.decode(ByteBuffer.wrap(stream.buffer(), 0, stream.size())).toString();
+ } catch (CharacterCodingException cce) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "URLDecoder: Invalid character encoding detected after position " + position +
+ " of query string / form data (while parsing as " + charsetDecoder.charset().name() + ")"
+ );
+ }
+ }
+
+ /** Makes the buffer of ByteArrayOutputStream available without copy. */
+ static final class ByteArrayOutputStream2 extends ByteArrayOutputStream {
+ byte[] buffer() {
+ return buf;
+ }
+ }
+
+ private static int digit16(int b) {
+ if (b == -1) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Incomplete trailing escape (%) pattern");
+ }
+ if (b >= '0' && b <= '9') {
+ return b - '0';
+ }
+ if (b >= 'A' && b <= 'F') {
+ return b - ('A' - 10);
+ }
+ if (b >= 'a' && b <= 'f') {
+ return b - ('a' - 10);
+ }
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Invalid digit (" + ((char) b) + ") in escape (%) pattern");
+ }
+
public boolean isHandleSelect() {
return handleSelect;
}
@@ -404,15 +511,12 @@ class FormDataRequestParser implements S
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Not application/x-www-form-urlencoded content: "+req.getContentType() );
}
- String charset = ContentStreamBase.getCharsetFromContentType(req.getContentType());
- if (charset == null) charset = "UTF-8";
-
final Map<String,String[]> map = new HashMap<String, String[]>();
// also add possible URL parameters and include into the map (parsed using UTF-8):
final String qs = req.getQueryString();
if (qs != null) {
- SolrRequestParsers.parseQueryString(qs, "UTF-8", map);
+ SolrRequestParsers.parseQueryString(qs, map);
}
// may be -1, so we check again later. But if its already greater we can stop processing!
@@ -424,26 +528,21 @@ class FormDataRequestParser implements S
}
// get query String from request body, using the charset given in content-type:
- final InputStream in;
+ final String cs = ContentStreamBase.getCharsetFromContentType(req.getContentType());
+ final Charset charset = (cs == null) ? IOUtils.CHARSET_UTF_8 : Charset.forName(cs);
+ InputStream in = null;
try {
in = req.getInputStream();
- } catch (IllegalStateException ise) {
- throw (SolrException) getParameterIncompatibilityException().initCause(ise);
- }
- try {
- final String data = IOUtils.toString(new BoundedInputStream(in, maxLength), charset);
- // if there is remaining data in the underlying stream, throw exception:
- if (in.read() != -1) {
- // read remaining data and throw away:
- while (IOUtils.skip(in, 1024L) > 0);
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + uploadLimitKB + " KB");
- }
- if (data.length() == 0 && totalLength > 0L) {
+ final long bytesRead = SolrRequestParsers.parseFormDataContent(FastInputStream.wrap(in), maxLength, charset, map);
+ if (bytesRead == 0L && totalLength > 0L) {
throw getParameterIncompatibilityException();
}
- SolrRequestParsers.parseQueryString(data, charset, map);
+ } catch (IOException ioe) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
+ } catch (IllegalStateException ise) {
+ throw (SolrException) getParameterIncompatibilityException().initCause(ise);
} finally {
- IOUtils.closeQuietly(in);
+ IOUtils.closeWhileHandlingException(in);
}
return new MultiMapSolrParams(map);
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java?rev=1430397&r1=1430396&r2=1430397&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java Tue Jan 8 17:25:59 2013
@@ -17,6 +17,8 @@
package org.apache.solr.servlet;
+import java.net.URLEncoder;
+
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass;
@@ -74,7 +76,7 @@ public class DirectSolrConnectionTest ex
// Test using the Stream body parameter
for( String cmd : cmds ) {
- direct.request( "/update?"+CommonParams.STREAM_BODY+"="+cmd, null );
+ direct.request( "/update?"+CommonParams.STREAM_BODY+"="+URLEncoder.encode(cmd, "UTF-8"), null );
}
String got = direct.request( getIt, null );
assertTrue( got.indexOf( value ) > 0 );
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java?rev=1430397&r1=1430396&r2=1430397&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java Tue Jan 8 17:25:59 2013
@@ -25,7 +25,6 @@ import java.io.ByteArrayInputStream;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
-import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -115,7 +114,6 @@ public class SolrRequestParserTest exten
@Test
public void testStreamURL() throws Exception
{
- boolean ok = false;
String url = "http://www.apache.org/dist/lucene/solr/";
byte[] bytes = null;
try {
@@ -152,19 +150,51 @@ public class SolrRequestParserTest exten
}
@Test
- public void testUrlParamParsing()
+ public void testUrlParamParsing() throws Exception
{
- String[][] teststr = new String[][] {
+ final String[][] teststr = new String[][] {
{ "this is simple", "this%20is%20simple" },
{ "this is simple", "this+is+simple" },
{ "\u00FC", "%C3%BC" }, // lower-case "u" with diaeresis/umlaut
{ "\u0026", "%26" }, // &
- { "\u20AC", "%E2%82%AC" } // euro
+ { "", "" }, // empty
+ { "\u20AC", "%E2%82%ac" } // euro, also with lowercase escapes
};
for( String[] tst : teststr ) {
- MultiMapSolrParams params = SolrRequestParsers.parseQueryString( "val="+tst[1] );
+ SolrParams params = SolrRequestParsers.parseQueryString( "val="+tst[1] );
assertEquals( tst[0], params.get( "val" ) );
+ params = SolrRequestParsers.parseQueryString( "val="+tst[1]+"&" );
+ assertEquals( tst[0], params.get( "val" ) );
+ params = SolrRequestParsers.parseQueryString( "&&val="+tst[1]+"&" );
+ assertEquals( tst[0], params.get( "val" ) );
+ params = SolrRequestParsers.parseQueryString( "&&val="+tst[1]+"&&&val="+tst[1]+"&" );
+ assertArrayEquals(new String[]{tst[0],tst[0]}, params.getParams("val") );
+ }
+
+ SolrParams params = SolrRequestParsers.parseQueryString("val");
+ assertEquals("", params.get("val"));
+
+ params = SolrRequestParsers.parseQueryString("val&foo=bar=bar&muh&");
+ assertEquals("", params.get("val"));
+ assertEquals("bar=bar", params.get("foo"));
+ assertEquals("", params.get("muh"));
+
+ final String[] invalid = {
+ "q=h%FCllo", // non-UTF-8
+ "q=h\u00FCllo", // encoded string is not pure US-ASCII
+ "q=hallo%", // incomplete escape
+ "q=hallo%1", // incomplete escape
+ "q=hallo%XX123", // invalid digit 'X' in escape
+ "=hallo" // missing key
+ };
+ for (String s : invalid) {
+ try {
+ SolrRequestParsers.parseQueryString(s);
+ fail("Should throw SolrException");
+ } catch (SolrException se) {
+ // pass
+ }
}
}
@@ -172,7 +202,7 @@ public class SolrRequestParserTest exten
public void testStandardParseParamsAndFillStreams() throws Exception
{
final String getParams = "qt=%C3%BC&dup=foo", postParams = "q=hello&d%75p=bar";
- final byte[] postBytes = postParams.getBytes("UTF-8");
+ final byte[] postBytes = postParams.getBytes("US-ASCII");
// Set up the expected behavior
final String[] ct = new String[] {
@@ -224,7 +254,7 @@ public class SolrRequestParserTest exten
expect(request.getContentLength()).andReturn(-1).anyTimes();
expect(request.getQueryString()).andReturn(null).anyTimes();
expect(request.getInputStream()).andReturn(new ServletInputStream() {
- private final ByteArrayInputStream in = new ByteArrayInputStream(large.toString().getBytes("UTF-8"));
+ private final ByteArrayInputStream in = new ByteArrayInputStream(large.toString().getBytes("US-ASCII"));
@Override public int read() { return in.read(); }
});
replay(request);