You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/11 15:39:47 UTC
svn commit: r1432065 [4/5] - in /lucene/dev/branches/lucene4547: ./ lucene/
lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/test/org/apache/lucene/analysis/core/
lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/...
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java Fri Jan 11 14:39:45 2013
@@ -18,17 +18,20 @@ package org.apache.lucene.codecs.asserti
*/
import java.io.IOException;
+import java.util.Comparator;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.index.AssertingAtomicReader;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.BytesRef;
/**
* Just like {@link Lucene40TermVectorsFormat} but with additional asserts.
@@ -43,16 +46,16 @@ public class AssertingTermVectorsFormat
@Override
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
- return in.vectorsWriter(directory, segmentInfo, context);
+ return new AssertingTermVectorsWriter(in.vectorsWriter(directory, segmentInfo, context));
}
-
+
static class AssertingTermVectorsReader extends TermVectorsReader {
private final TermVectorsReader in;
-
+
AssertingTermVectorsReader(TermVectorsReader in) {
this.in = in;
}
-
+
@Override
public void close() throws IOException {
in.close();
@@ -68,5 +71,120 @@ public class AssertingTermVectorsFormat
public TermVectorsReader clone() {
return new AssertingTermVectorsReader(in.clone());
}
- }
+ }
+
+ enum Status {
+ UNDEFINED, STARTED, FINISHED;
+ }
+
+ static class AssertingTermVectorsWriter extends TermVectorsWriter {
+ private final TermVectorsWriter in;
+ private Status docStatus, fieldStatus, termStatus;
+ private int fieldCount, termCount, positionCount;
+ boolean hasPositions;
+
+ AssertingTermVectorsWriter(TermVectorsWriter in) {
+ this.in = in;
+ docStatus = Status.UNDEFINED;
+ fieldStatus = Status.UNDEFINED;
+ termStatus = Status.UNDEFINED;
+ fieldCount = termCount = positionCount = 0;
+ }
+
+ @Override
+ public void startDocument(int numVectorFields) throws IOException {
+ assert fieldCount == 0;
+ assert docStatus != Status.STARTED;
+ in.startDocument(numVectorFields);
+ docStatus = Status.STARTED;
+ fieldCount = numVectorFields;
+ }
+
+ @Override
+ public void finishDocument() throws IOException {
+ assert fieldCount == 0;
+ assert docStatus == Status.STARTED;
+ in.finishDocument();
+ docStatus = Status.FINISHED;
+ }
+
+ @Override
+ public void startField(FieldInfo info, int numTerms, boolean positions,
+ boolean offsets, boolean payloads) throws IOException {
+ assert termCount == 0;
+ assert docStatus == Status.STARTED;
+ assert fieldStatus != Status.STARTED;
+ in.startField(info, numTerms, positions, offsets, payloads);
+ fieldStatus = Status.STARTED;
+ termCount = numTerms;
+ hasPositions = positions || offsets || payloads;
+ }
+
+ @Override
+ public void finishField() throws IOException {
+ assert termCount == 0;
+ assert fieldStatus == Status.STARTED;
+ in.finishField();
+ fieldStatus = Status.FINISHED;
+ --fieldCount;
+ }
+
+ @Override
+ public void startTerm(BytesRef term, int freq) throws IOException {
+ assert docStatus == Status.STARTED;
+ assert fieldStatus == Status.STARTED;
+ assert termStatus != Status.STARTED;
+ in.startTerm(term, freq);
+ termStatus = Status.STARTED;
+ positionCount = hasPositions ? freq : 0;
+ }
+
+ @Override
+ public void finishTerm() throws IOException {
+ assert positionCount == 0;
+ assert docStatus == Status.STARTED;
+ assert fieldStatus == Status.STARTED;
+ assert termStatus == Status.STARTED;
+ in.finishTerm();
+ termStatus = Status.FINISHED;
+ --termCount;
+ }
+
+ @Override
+ public void addPosition(int position, int startOffset, int endOffset,
+ BytesRef payload) throws IOException {
+ assert docStatus == Status.STARTED;
+ assert fieldStatus == Status.STARTED;
+ assert termStatus == Status.STARTED;
+ in.addPosition(position, startOffset, endOffset, payload);
+ --positionCount;
+ }
+
+ @Override
+ public void abort() {
+ in.abort();
+ }
+
+ @Override
+ public void finish(FieldInfos fis, int numDocs) throws IOException {
+ assert docStatus == (numDocs > 0 ? Status.FINISHED : Status.UNDEFINED);
+ assert fieldStatus != Status.STARTED;
+ assert termStatus != Status.STARTED;
+ in.finish(fis, numDocs);
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() throws IOException {
+ return in.getComparator();
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ assert docStatus != Status.STARTED;
+ assert fieldStatus != Status.STARTED;
+ assert termStatus != Status.STARTED;
+ }
+
+ }
}
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java Fri Jan 11 14:39:45 2013
@@ -21,6 +21,7 @@ import java.util.Random;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java Fri Jan 11 14:39:45 2013
@@ -287,7 +287,8 @@ public class FSTTester<T> {
allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
outputs,
null,
- willRewrite);
+ willRewrite,
+ true);
for(InputOutput<T> pair : pairs) {
if (pair.output instanceof List) {
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec Fri Jan 11 14:39:45 2013
@@ -17,4 +17,4 @@ org.apache.lucene.codecs.asserting.Asser
org.apache.lucene.codecs.compressing.FastCompressingCodec
org.apache.lucene.codecs.compressing.FastDecompressionCompressingCodec
org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec
-org.apache.lucene.codecs.compressing.DummyCompressingCodec
+org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec
Modified: lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/servlet-api.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/servlet-api.txt?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/servlet-api.txt (original)
+++ lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/servlet-api.txt Fri Jan 11 14:39:45 2013
@@ -24,3 +24,18 @@ javax.servlet.ServletRequest#getParamete
javax.servlet.ServletRequest#getParameterValues(java.lang.String)
javax.servlet.ServletResponse#getWriter()
+javax.servlet.ServletInputStream#readLine(byte[],int,int)
+javax.servlet.ServletOutputStream#print(boolean)
+javax.servlet.ServletOutputStream#print(char)
+javax.servlet.ServletOutputStream#print(double)
+javax.servlet.ServletOutputStream#print(float)
+javax.servlet.ServletOutputStream#print(int)
+javax.servlet.ServletOutputStream#print(long)
+javax.servlet.ServletOutputStream#print(java.lang.String)
+javax.servlet.ServletOutputStream#println(boolean)
+javax.servlet.ServletOutputStream#println(char)
+javax.servlet.ServletOutputStream#println(double)
+javax.servlet.ServletOutputStream#println(float)
+javax.servlet.ServletOutputStream#println(int)
+javax.servlet.ServletOutputStream#println(long)
+javax.servlet.ServletOutputStream#println(java.lang.String)
Modified: lucene/dev/branches/lucene4547/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/CHANGES.txt?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/solr/CHANGES.txt Fri Jan 11 14:39:45 2013
@@ -195,12 +195,14 @@ New Features
that can be set to false to not filter. Its useful when there is already a spatial
filter query but you also need to sort or boost by distance. (David Smiley)
-* SOLR-4265: Solr now parses request parameters (in URL or sent with POST using
- content-type application/x-www-form-urlencoded) in its dispatcher code. It no
+* SOLR-4265, SOLR-4283: Solr now parses request parameters (in URL or sent with POST
+ using content-type application/x-www-form-urlencoded) in its dispatcher code. It no
longer relies on special configuration settings in Tomcat or other web containers
- to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Also
- the maximum length of x-www-form-urlencoded POST parameters can now be configured
- through the requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
+ to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Query
+ strings passed in via the URL need to be properly-%-escaped, UTF-8 encoded
+ bytes, otherwise Solr refuses to handle the request. The maximum length of
+ x-www-form-urlencoded POST parameters can now be configured through the
+ requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
solrconfig.xml (defaults to 2 MiB). Solr now works out of the box with
e.g. Tomcat, JBoss,... (Uwe Schindler, Dawid Weiss, Alex Rocher)
@@ -246,6 +248,8 @@ Optimizations
* SOLR-3840: XML query response display is unreadable in Solr Admin Query UI
(steffkes)
+* SOLR-3982: Admin UI: Various Dataimport Improvements (steffkes)
+
Bug Fixes
----------------------
@@ -493,6 +497,12 @@ Bug Fixes
* SOLR-4170: The 'backup' ReplicationHandler command can sometimes use a stale
index directory rather than the current one. (Mark Miller, Marcin Rzewuck)
+* SOLR-3876: Solr Admin UI is completely dysfunctional on IE 9 (steffkes)
+
+* SOLR-4112: Fixed DataImportHandler ZKAwarePropertiesWriter implementation so
+ import works fine with SolrCloud clusters (Deniz Durmus, James Dyer,
+ Erick Erickson, shalin)
+
Other Changes
----------------------
@@ -567,6 +577,9 @@ Other Changes
* SOLR-4226: Extract fl parsing code out of ReturnFields constructor.
(Ryan Ernst via Robert Muir)
+* SOLR-4208: ExtendedDismaxQParserPlugin has been refactored to make
+ subclassing easier. (Tomás Fernández Löbbe, hossman)
+
================== 4.0.0 ==================
Versions of Major Components
@@ -1684,6 +1697,8 @@ Bug Fixes
* SOLR-1958: When using the MailEntityProcessor, import would fail if
fetchMailsSince was not specified. (Max Lynch via James Dyer)
+* SOLR-4289: Admin UI - JVM memory bar - dark grey "used" width is too small
+ (steffkes, elyograg)
Other Changes
----------------------
Modified: lucene/dev/branches/lucene4547/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/build.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/build.xml (original)
+++ lucene/dev/branches/lucene4547/solr/build.xml Fri Jan 11 14:39:45 2013
@@ -364,7 +364,20 @@
<target name="prepare-release" depends="prepare-release-no-sign, sign-artifacts"/>
<!-- make a distribution -->
- <target name="package" depends="package-src-tgz,create-package"/>
+ <target name="package" depends="package-src-tgz,create-package,-dist-changes,-dist-keys"/>
+
+ <!-- copy changes/ to the release folder -->
+ <target name="-dist-changes">
+ <copy todir="${package.dir}/changes">
+ <fileset dir="build/docs/changes"/>
+ </copy>
+ </target>
+
+ <!-- copy KEYS to the release folder -->
+ <target name="-dist-keys">
+ <get src="http://people.apache.org/keys/group/lucene.asc"
+ dest="${package.dir}/KEYS"/>
+ </target>
<!-- Makes a tarball from running "svn export" at the root level. -->
<!-- Copies NOTICE.txt and LICENSE.txt from solr/ to the root level. -->
Modified: lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java (original)
+++ lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java Fri Jan 11 14:39:45 2013
@@ -84,12 +84,7 @@ public class SimplePropertiesWriter exte
} else {
filename = "dataimport.properties";
}
- if(params.get(DIRECTORY) != null) {
- configDir = params.get(DIRECTORY);
- } else {
- SolrCore core = dataImporter.getCore();
- configDir = (core == null ? "." : core.getResourceLoader().getConfigDir());
- }
+ findDirectory(dataImporter, params);
if(params.get(LOCALE) != null) {
String localeStr = params.get(LOCALE);
for (Locale l : Locale.getAvailableLocales()) {
@@ -109,6 +104,14 @@ public class SimplePropertiesWriter exte
} else {
dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", locale);
}
+ }
+ protected void findDirectory(DataImporter dataImporter, Map<String, String> params) {
+ if(params.get(DIRECTORY) != null) {
+ configDir = params.get(DIRECTORY);
+ } else {
+ SolrCore core = dataImporter.getCore();
+ configDir = (core == null ? "." : core.getResourceLoader().getConfigDir());
+ }
}
private File getPersistFile() {
Modified: lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java (original)
+++ lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java Fri Jan 11 14:39:45 2013
@@ -42,12 +42,16 @@ public class ZKPropertiesWriter extends
@Override
public void init(DataImporter dataImporter, Map<String, String> params) {
- super.init(dataImporter, params);
+ super.init(dataImporter, params);
+ zkClient = dataImporter.getCore().getCoreDescriptor().getCoreContainer()
+ .getZkController().getZkClient();
+ }
+
+ @Override
+ protected void findDirectory(DataImporter dataImporter, Map<String, String> params) {
String collection = dataImporter.getCore().getCoreDescriptor()
.getCloudDescriptor().getCollectionName();
path = "/configs/" + collection + "/" + filename;
- zkClient = dataImporter.getCore().getCoreDescriptor().getCoreContainer()
- .getZkController().getZkClient();
}
@Override
Modified: lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/test-files/dih/solr/collection1/conf/dataimport-schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/test-files/dih/solr/collection1/conf/dataimport-schema.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/test-files/dih/solr/collection1/conf/dataimport-schema.xml (original)
+++ lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/test-files/dih/solr/collection1/conf/dataimport-schema.xml Fri Jan 11 14:39:45 2013
@@ -41,6 +41,8 @@
<field name="COUNTRY_NAME" type="text" indexed="true" stored="true" multiValued="true" />
<field name="SPORT_NAME" type="text" indexed="true" stored="true" multiValued="true" />
<field name="DO_NOT_INDEX" type="ignored" />
+
+ <field name="_version_" type="tlong" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_i" type="tint" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
Modified: lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java Fri Jan 11 14:39:45 2013
@@ -748,7 +748,8 @@ public final class ZkController {
// now wait until our currently cloud state contains the latest leader
String clusterStateLeader = zkStateReader.getLeaderUrl(collection,
- shardId, timeoutms);
+ shardId, timeoutms * 2); // since we found it in zk, we are willing to
+ // wait a while to find it in state
int tries = 0;
while (!leaderUrl.equals(clusterStateLeader)) {
if (tries == 60) {
Modified: lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (original)
+++ lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Fri Jan 11 14:39:45 2013
@@ -17,39 +17,13 @@
package org.apache.solr.search;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.StopFilterFactory;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
-import org.apache.lucene.queries.function.BoostedQuery;
-import org.apache.lucene.queries.function.FunctionQuery;
-import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
-import org.apache.lucene.queries.function.valuesource.QueryValueSource;
-import org.apache.lucene.search.*;
-import org.apache.solr.analysis.TokenizerChain;
-import org.apache.solr.parser.ParseException;
-import org.apache.solr.parser.QueryParser;
-import org.apache.solr.parser.SolrQueryParserBase.MagicFieldName;
-import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.util.SolrPluginUtils;
/**
* An advanced multi-field query parser based on the DisMax parser.
* See Wiki page http://wiki.apache.org/solr/ExtendedDisMax
- * @lucene.experimental
*/
public class ExtendedDismaxQParserPlugin extends QParserPlugin {
public static final String NAME = "edismax";
@@ -62,1328 +36,4 @@ public class ExtendedDismaxQParserPlugin
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new ExtendedDismaxQParser(qstr, localParams, params, req);
}
-}
-
-
-class ExtendedDismaxQParser extends QParser {
-
- /**
- * A field we can't ever find in any schema, so we can safely tell
- * DisjunctionMaxQueryParser to use it as our defaultField, and
- * map aliases from it to any field in our schema.
- */
- private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
-
- /** shorten the class references for utilities */
- private static class U extends SolrPluginUtils {
- /* :NOOP */
- }
-
- /** shorten the class references for utilities */
- private static interface DMP extends DisMaxParams {
- /**
- * User fields. The fields that can be used by the end user to create field-specific queries.
- */
- public static String UF = "uf";
- }
-
-
- public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
- super(qstr, localParams, params, req);
- }
-
- /**
- * The field names specified by 'qf' that (most) clauses will
- * be queried against
- */
- private Map<String,Float> queryFields;
-
- /**
- * The field names specified by 'uf' that users are
- * allowed to include literally in their query string. The Float
- * boost values will be applied automaticly to any clause using that
- * field name. '*' will be treated as an alias for any
- * field that exists in the schema. Wildcards are allowed to
- * express dynamicFields.
- */
- private UserFields userFields;
-
- private Query parsedUserQuery;
-
- private String[] boostParams;
- private String[] multBoosts;
- private List<Query> boostQueries;
- private Query altUserQuery;
- private QParser altQParser;
- private SolrParams solrParams;
-
-
- @Override
- public Query parse() throws SyntaxError {
- SolrParams localParams = getLocalParams();
- SolrParams params = getParams();
-
- solrParams = SolrParams.wrapDefaults(localParams, params);
-
- final String minShouldMatch =
- DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams);
-
- userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF)));
-
- queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), solrParams);
-
- // Phrase slop array
- int pslop[] = new int[4];
- pslop[0] = solrParams.getInt(DisMaxParams.PS, 0);
- pslop[2] = solrParams.getInt(DisMaxParams.PS2, pslop[0]);
- pslop[3] = solrParams.getInt(DisMaxParams.PS3, pslop[0]);
-
-
- // Boosted phrase of the full query string
- List<FieldParams> phraseFields =
- U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF),0,pslop[0]);
- // Boosted Bi-Term Shingles from the query string
- List<FieldParams> phraseFields2 =
- U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF2),2,pslop[2]);
- // Boosted Tri-Term Shingles from the query string
- List<FieldParams> phraseFields3 =
- U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF3),3,pslop[3]);
-
- float tiebreaker = solrParams.getFloat(DisMaxParams.TIE, 0.0f);
-
- int qslop = solrParams.getInt(DisMaxParams.QS, 0);
-
- // remove stopwords from mandatory "matching" component?
- boolean stopwords = solrParams.getBool("stopwords", true);
-
- /* the main query we will execute. we disable the coord because
- * this query is an artificial construct
- */
- BooleanQuery query = new BooleanQuery(true);
-
- /* * * Main User Query * * */
- parsedUserQuery = null;
- String userQuery = getString();
- altUserQuery = null;
- if( userQuery == null || userQuery.trim().length() == 0 ) {
- // If no query is specified, we may have an alternate
- String altQ = solrParams.get( DisMaxParams.ALTQ );
- if (altQ != null) {
- altQParser = subQuery(altQ, null);
- altUserQuery = altQParser.getQuery();
- query.add( altUserQuery , BooleanClause.Occur.MUST );
- } else {
- return null;
- // throw new SyntaxError("missing query string" );
- }
- }
- else {
- // There is a valid query string
- // userQuery = partialEscape(U.stripUnbalancedQuotes(userQuery)).toString();
-
- boolean lowercaseOperators = solrParams.getBool("lowercaseOperators", true);
- String mainUserQuery = userQuery;
-
- ExtendedSolrQueryParser up =
- new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
- up.addAlias(IMPOSSIBLE_FIELD_NAME,
- tiebreaker, queryFields);
- addAliasesFromRequest(up, tiebreaker);
- up.setPhraseSlop(qslop); // slop for explicit user phrase queries
- up.setAllowLeadingWildcard(true);
-
- // defer escaping and only do if lucene parsing fails, or we need phrases
- // parsing fails. Need to sloppy phrase queries anyway though.
- List<Clause> clauses = null;
- int numPluses = 0;
- int numMinuses = 0;
- int numOR = 0;
- int numNOT = 0;
-
- clauses = splitIntoClauses(userQuery, false);
- for (Clause clause : clauses) {
- if (clause.must == '+') numPluses++;
- if (clause.must == '-') numMinuses++;
- if (clause.isBareWord()) {
- String s = clause.val;
- if ("OR".equals(s)) {
- numOR++;
- } else if ("NOT".equals(s)) {
- numNOT++;
- } else if (lowercaseOperators && "or".equals(s)) {
- numOR++;
- }
- }
- }
-
- // Always rebuild mainUserQuery from clauses to catch modifications from splitIntoClauses
- // This was necessary for userFields modifications to get propagated into the query.
- // Convert lower or mixed case operators to uppercase if we saw them.
- // only do this for the lucene query part and not for phrase query boosting
- // since some fields might not be case insensitive.
- // We don't use a regex for this because it might change and AND or OR in
- // a phrase query in a case sensitive field.
- StringBuilder sb = new StringBuilder();
- for (int i=0; i<clauses.size(); i++) {
- Clause clause = clauses.get(i);
- String s = clause.raw;
- // and and or won't be operators at the start or end
- if (lowercaseOperators && i>0 && i+1<clauses.size()) {
- if ("AND".equalsIgnoreCase(s)) {
- s="AND";
- } else if ("OR".equalsIgnoreCase(s)) {
- s="OR";
- }
- }
- sb.append(s);
- sb.append(' ');
- }
-
- mainUserQuery = sb.toString();
-
- // For correct lucene queries, turn off mm processing if there
- // were explicit operators (except for AND).
- boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0;
- // but always for unstructured implicit bqs created by getFieldQuery
- up.minShouldMatch = minShouldMatch;
-
- try {
- up.setRemoveStopFilter(!stopwords);
- up.exceptions = true;
- parsedUserQuery = up.parse(mainUserQuery);
-
- if (stopwords && isEmpty(parsedUserQuery)) {
- // if the query was all stop words, remove none of them
- up.setRemoveStopFilter(true);
- parsedUserQuery = up.parse(mainUserQuery);
- }
- } catch (Exception e) {
- // ignore failure and reparse later after escaping reserved chars
- up.exceptions = false;
- }
-
- if (parsedUserQuery != null && doMinMatched) {
- if (parsedUserQuery instanceof BooleanQuery) {
- SolrPluginUtils.setMinShouldMatch((BooleanQuery)parsedUserQuery, minShouldMatch);
- }
- }
-
- if (parsedUserQuery == null) {
- sb = new StringBuilder();
- for (Clause clause : clauses) {
-
- boolean doQuote = clause.isPhrase;
-
- String s=clause.val;
- if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) {
- doQuote=true;
- }
-
- if (clause.must != 0) {
- sb.append(clause.must);
- }
- if (clause.field != null) {
- sb.append(clause.field);
- sb.append(':');
- }
- if (doQuote) {
- sb.append('"');
- }
- sb.append(clause.val);
- if (doQuote) {
- sb.append('"');
- }
- if (clause.field != null) {
- // Add the default user field boost, if any
- Float boost = userFields.getBoost(clause.field);
- if(boost != null)
- sb.append("^").append(boost);
- }
- sb.append(' ');
- }
- String escapedUserQuery = sb.toString();
- parsedUserQuery = up.parse(escapedUserQuery);
-
- if (parsedUserQuery instanceof BooleanQuery) {
- BooleanQuery t = new BooleanQuery();
- SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)parsedUserQuery);
- SolrPluginUtils.setMinShouldMatch(t, minShouldMatch);
- parsedUserQuery = t;
- }
- }
-
- query.add(parsedUserQuery, BooleanClause.Occur.MUST);
-
- // sloppy phrase queries for proximity
- List<FieldParams> allPhraseFields = new ArrayList<FieldParams>();
- allPhraseFields.addAll(phraseFields);
- allPhraseFields.addAll(phraseFields2);
- allPhraseFields.addAll(phraseFields3);
-
- if (allPhraseFields.size() > 0) {
- // find non-field clauses
- List<Clause> normalClauses = new ArrayList<Clause>(clauses.size());
- for (Clause clause : clauses) {
- if (clause.field != null || clause.isPhrase) continue;
- // check for keywords "AND,OR,TO"
- if (clause.isBareWord()) {
- String s = clause.val.toString();
- // avoid putting explict operators in the phrase query
- if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue;
- }
- normalClauses.add(clause);
- }
-
- // full phrase and shingles
- for (FieldParams phraseField: allPhraseFields) {
- Map<String,Float> pf = new HashMap<String,Float>(1);
- pf.put(phraseField.getField(),phraseField.getBoost());
- addShingledPhraseQueries(query, normalClauses, pf,
- phraseField.getWordGrams(),tiebreaker, phraseField.getSlop());
- }
-
- }
- }
-
-
-
- /* * * Boosting Query * * */
- boostParams = solrParams.getParams(DisMaxParams.BQ);
- //List<Query> boostQueries = U.parseQueryStrings(req, boostParams);
- boostQueries=null;
- if (boostParams!=null && boostParams.length>0) {
- boostQueries = new ArrayList<Query>();
- for (String qs : boostParams) {
- if (qs.trim().length()==0) continue;
- Query q = subQuery(qs, null).getQuery();
- boostQueries.add(q);
- }
- }
- if (null != boostQueries) {
- for(Query f : boostQueries) {
- query.add(f, BooleanClause.Occur.SHOULD);
- }
- }
-
- /* * * Boosting Functions * * */
-
- String[] boostFuncs = solrParams.getParams(DisMaxParams.BF);
- if (null != boostFuncs && 0 != boostFuncs.length) {
- for (String boostFunc : boostFuncs) {
- if(null == boostFunc || "".equals(boostFunc)) continue;
- Map<String,Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
- for (String f : ff.keySet()) {
- Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery();
- Float b = ff.get(f);
- if (null != b) {
- fq.setBoost(b);
- }
- query.add(fq, BooleanClause.Occur.SHOULD);
- }
- }
- }
-
-
- //
- // create a boosted query (scores multiplied by boosts)
- //
- Query topQuery = query;
- multBoosts = solrParams.getParams("boost");
- if (multBoosts!=null && multBoosts.length>0) {
-
- List<ValueSource> boosts = new ArrayList<ValueSource>();
- for (String boostStr : multBoosts) {
- if (boostStr==null || boostStr.length()==0) continue;
- Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery();
- ValueSource vs;
- if (boost instanceof FunctionQuery) {
- vs = ((FunctionQuery)boost).getValueSource();
- } else {
- vs = new QueryValueSource(boost, 1.0f);
- }
- boosts.add(vs);
- }
-
- if (boosts.size()>1) {
- ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
- topQuery = new BoostedQuery(query, prod);
- } else if (boosts.size() == 1) {
- topQuery = new BoostedQuery(query, boosts.get(0));
- }
- }
-
- return topQuery;
- }
-
- /**
- * Extracts all the aliased fields from the requests and adds them to up
- */
- private void addAliasesFromRequest(ExtendedSolrQueryParser up, float tiebreaker) {
- Iterator<String> it = solrParams.getParameterNamesIterator();
- while(it.hasNext()) {
- String param = it.next();
- if(param.startsWith("f.") && param.endsWith(".qf")) {
- // Add the alias
- String fname = param.substring(2,param.length()-3);
- String qfReplacement = solrParams.get(param);
- Map<String,Float> parsedQf = SolrPluginUtils.parseFieldBoosts(qfReplacement);
- if(parsedQf.size() == 0)
- return;
- up.addAlias(fname, tiebreaker, parsedQf);
- }
- }
- }
-
- /**
- * Modifies the main query by adding a new optional Query consisting
- * of shingled phrase queries across the specified clauses using the
- * specified field => boost mappings.
- *
- * @param mainQuery Where the phrase boosting queries will be added
- * @param clauses Clauses that will be used to construct the phrases
- * @param fields Field => boost mappings for the phrase queries
- * @param shingleSize how big the phrases should be, 0 means a single phrase
- * @param tiebreaker tie breker value for the DisjunctionMaxQueries
- * @param slop slop value for the constructed phrases
- */
- private void addShingledPhraseQueries(final BooleanQuery mainQuery,
- final List<Clause> clauses,
- final Map<String,Float> fields,
- int shingleSize,
- final float tiebreaker,
- final int slop)
- throws SyntaxError {
-
- if (null == fields || fields.isEmpty() ||
- null == clauses || clauses.size() < shingleSize )
- return;
-
- if (0 == shingleSize) shingleSize = clauses.size();
-
- final int goat = shingleSize-1; // :TODO: better name for var?
-
- StringBuilder userPhraseQuery = new StringBuilder();
- for (int i=0; i < clauses.size() - goat; i++) {
- userPhraseQuery.append('"');
- for (int j=0; j <= goat; j++) {
- userPhraseQuery.append(clauses.get(i + j).val);
- userPhraseQuery.append(' ');
- }
- userPhraseQuery.append('"');
- userPhraseQuery.append(' ');
- }
-
- /* for parsing sloppy phrases using DisjunctionMaxQueries */
- ExtendedSolrQueryParser pp =
- new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
-
- pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, fields);
- pp.setPhraseSlop(slop);
- pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
-
- /* :TODO: reevaluate using makeDismax=true vs false...
- *
- * The DismaxQueryParser always used DisjunctionMaxQueries for the
- * pf boost, for the same reasons it used them for the qf fields.
- * When Yonik first wrote the ExtendedDismaxQParserPlugin, he added
- * the "makeDismax=false" property to use BooleanQueries instead, but
- * when asked why his response was "I honestly don't recall" ...
- *
- * https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813
- *
- * so for now, we continue to use dismax style queries becuse it
- * seems the most logical and is back compatible, but we should
- * try to figure out what Yonik was thinking at the time (because he
- * rarely does things for no reason)
- */
- pp.makeDismax = true;
-
-
- // minClauseSize is independent of the shingleSize because of stop words
- // (if they are removed from the middle, so be it, but we need at least
- // two or there shouldn't be a boost)
- pp.minClauseSize = 2;
-
- // TODO: perhaps we shouldn't use synonyms either...
-
- Query phrase = pp.parse(userPhraseQuery.toString());
- if (phrase != null) {
- mainQuery.add(phrase, BooleanClause.Occur.SHOULD);
- }
- }
-
-
- @Override
- public String[] getDefaultHighlightFields() {
- String[] highFields = queryFields.keySet().toArray(new String[0]);
- return highFields;
- }
-
- @Override
- public Query getHighlightQuery() throws SyntaxError {
- return parsedUserQuery == null ? altUserQuery : parsedUserQuery;
- }
-
- @Override
- public void addDebugInfo(NamedList<Object> debugInfo) {
- super.addDebugInfo(debugInfo);
- debugInfo.add("altquerystring", altUserQuery);
- if (null != boostQueries) {
- debugInfo.add("boost_queries", boostParams);
- debugInfo.add("parsed_boost_queries",
- QueryParsing.toString(boostQueries, getReq().getSchema()));
- }
- debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF));
- }
-
-
-// FIXME: Not in use
-// public static CharSequence partialEscape(CharSequence s) {
-// StringBuilder sb = new StringBuilder();
-//
-// int len = s.length();
-// for (int i = 0; i < len; i++) {
-// char c = s.charAt(i);
-// if (c == ':') {
-// // look forward to make sure it's something that won't
-// // cause a parse exception (something that won't be escaped... like
-// // +,-,:, whitespace
-// if (i+1<len && i>0) {
-// char ch = s.charAt(i+1);
-// if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
-// // OK, at this point the chars after the ':' will be fine.
-// // now look back and try to determine if this is a fieldname
-// // [+,-]? [letter,_] [letter digit,_,-,.]*
-// // This won't cover *all* possible lucene fieldnames, but we should
-// // only pick nice names to begin with
-// int start, pos;
-// for (start=i-1; start>=0; start--) {
-// ch = s.charAt(start);
-// if (Character.isWhitespace(ch)) break;
-// }
-//
-// // skip whitespace
-// pos = start+1;
-//
-// // skip leading + or -
-// ch = s.charAt(pos);
-// if (ch=='+' || ch=='-') {
-// pos++;
-// }
-//
-// // we don't need to explicitly check for end of string
-// // since ':' will act as our sentinal
-//
-// // first char can't be '-' or '.'
-// ch = s.charAt(pos++);
-// if (Character.isJavaIdentifierPart(ch)) {
-//
-// for(;;) {
-// ch = s.charAt(pos++);
-// if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
-// break;
-// }
-// }
-//
-// if (pos<=i) {
-// // OK, we got to the ':' and everything looked like a valid fieldname, so
-// // don't escape the ':'
-// sb.append(':');
-// continue; // jump back to start of outer-most loop
-// }
-//
-// }
-//
-//
-// }
-// }
-//
-// // we fell through to here, so we should escape this like other reserved chars.
-// sb.append('\\');
-// }
-// else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
-// c == '^' || c == '[' || c == ']' ||
-// c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
-// )
-// {
-// sb.append('\\');
-// }
-// sb.append(c);
-// }
-// return sb;
-// }
-
-
- static class Clause {
-
- boolean isBareWord() {
- return must==0 && !isPhrase;
- }
-
- String field;
- String rawField; // if the clause is +(foo:bar) then rawField=(foo
- boolean isPhrase;
- boolean hasWhitespace;
- boolean hasSpecialSyntax;
- boolean syntaxError;
- char must; // + or -
- String val; // the field value (minus the field name, +/-, quotes)
- String raw; // the raw clause w/o leading/trailing whitespace
- }
-
-
- public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) {
- ArrayList<Clause> lst = new ArrayList<Clause>(4);
- Clause clause;
-
- int pos=0;
- int end=s.length();
- char ch=0;
- int start;
- boolean disallowUserField;
- outer: while (pos < end) {
- clause = new Clause();
- disallowUserField = true;
-
- ch = s.charAt(pos);
-
- while (Character.isWhitespace(ch)) {
- if (++pos >= end) break;
- ch = s.charAt(pos);
- }
-
- start = pos;
-
- if (ch=='+' || ch=='-') {
- clause.must = ch;
- pos++;
- }
-
- clause.field = getFieldName(s, pos, end);
- if(clause.field != null && !userFields.isAllowed(clause.field)) {
- clause.field = null;
- }
- if (clause.field != null) {
- disallowUserField = false;
- int colon = s.indexOf(':',pos);
- clause.rawField = s.substring(pos, colon);
- pos += colon - pos; // skip the field name
- pos++; // skip the ':'
- }
-
- if (pos>=end) break;
-
-
- char inString=0;
-
- ch = s.charAt(pos);
- if (!ignoreQuote && ch=='"') {
- clause.isPhrase = true;
- inString = '"';
- pos++;
- }
-
- StringBuilder sb = new StringBuilder();
- while (pos < end) {
- ch = s.charAt(pos++);
- if (ch=='\\') { // skip escaped chars, but leave escaped
- sb.append(ch);
- if (pos >= end) {
- sb.append(ch); // double backslash if we are at the end of the string
- break;
- }
- ch = s.charAt(pos++);
- sb.append(ch);
- continue;
- } else if (inString != 0 && ch == inString) {
- inString=0;
- break;
- } else if (Character.isWhitespace(ch)) {
- clause.hasWhitespace=true;
- if (inString == 0) {
- // end of the token if we aren't in a string, backing
- // up the position.
- pos--;
- break;
- }
- }
-
- if (inString == 0) {
- switch (ch) {
- case '!':
- case '(':
- case ')':
- case ':':
- case '^':
- case '[':
- case ']':
- case '{':
- case '}':
- case '~':
- case '*':
- case '?':
- case '"':
- case '+':
- case '-':
- case '\\':
- case '|':
- case '&':
- case '/':
- clause.hasSpecialSyntax = true;
- sb.append('\\');
- }
- } else if (ch=='"') {
- // only char we need to escape in a string is double quote
- sb.append('\\');
- }
- sb.append(ch);
- }
- clause.val = sb.toString();
-
- if (clause.isPhrase) {
- if (inString != 0) {
- // detected bad quote balancing... retry
- // parsing with quotes like any other char
- return splitIntoClauses(s, true);
- }
-
- // special syntax in a string isn't special
- clause.hasSpecialSyntax = false;
- } else {
- // an empty clause... must be just a + or - on it's own
- if (clause.val.length() == 0) {
- clause.syntaxError = true;
- if (clause.must != 0) {
- clause.val="\\"+clause.must;
- clause.must = 0;
- clause.hasSpecialSyntax = true;
- } else {
- // uh.. this shouldn't happen.
- clause=null;
- }
- }
- }
-
- if (clause != null) {
- if(disallowUserField) {
- clause.raw = s.substring(start, pos);
- // escape colons, except for "match all" query
- if(!"*:*".equals(clause.raw)) {
- clause.raw = clause.raw.replaceAll(":", "\\\\:");
- }
- } else {
- clause.raw = s.substring(start, pos);
- // Add default userField boost if no explicit boost exists
- if(userFields.isAllowed(clause.field) && !clause.raw.contains("^")) {
- Float boost = userFields.getBoost(clause.field);
- if(boost != null)
- clause.raw += "^" + boost;
- }
- }
- lst.add(clause);
- }
- }
-
- return lst;
- }
-
- /**
- * returns a field name or legal field alias from the current
- * position of the string
- */
- public String getFieldName(String s, int pos, int end) {
- if (pos >= end) return null;
- int p=pos;
- int colon = s.indexOf(':',pos);
- // make sure there is space after the colon, but not whitespace
- if (colon<=pos || colon+1>=end || Character.isWhitespace(s.charAt(colon+1))) return null;
- char ch = s.charAt(p++);
- while ((ch=='(' || ch=='+' || ch=='-') && (pos<end)) {
- ch = s.charAt(p++);
- pos++;
- }
- if (!Character.isJavaIdentifierPart(ch)) return null;
- while (p<colon) {
- ch = s.charAt(p++);
- if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null;
- }
- String fname = s.substring(pos, p);
- boolean isInSchema = getReq().getSchema().getFieldTypeNoEx(fname) != null;
- boolean isAlias = solrParams.get("f."+fname+".qf") != null;
- boolean isMagic = (null != MagicFieldName.get(fname));
-
- return (isInSchema || isAlias || isMagic) ? fname : null;
- }
-
- public static List<String> split(String s, boolean ignoreQuote) {
- ArrayList<String> lst = new ArrayList<String>(4);
- int pos=0, start=0, end=s.length();
- char inString=0;
- char ch=0;
- while (pos < end) {
- char prevChar=ch;
- ch = s.charAt(pos++);
- if (ch=='\\') { // skip escaped chars
- pos++;
- } else if (inString != 0 && ch==inString) {
- inString=0;
- } else if (!ignoreQuote && ch=='"') {
- // If char is directly preceeded by a number or letter
- // then don't treat it as the start of a string.
- if (!Character.isLetterOrDigit(prevChar)) {
- inString=ch;
- }
- } else if (Character.isWhitespace(ch) && inString==0) {
- lst.add(s.substring(start,pos-1));
- start=pos;
- }
- }
- if (start < end) {
- lst.add(s.substring(start,end));
- }
-
- if (inString != 0) {
- // unbalanced quote... ignore them
- return split(s, true);
- }
-
- return lst;
- }
-
-
-
-
- enum QType {
- FIELD,
- PHRASE,
- PREFIX,
- WILDCARD,
- FUZZY,
- RANGE
- }
-
-
- static final RuntimeException unknownField = new RuntimeException("UnknownField");
- static {
- unknownField.fillInStackTrace();
- }
-
- /**
- * A subclass of SolrQueryParser that supports aliasing fields for
- * constructing DisjunctionMaxQueries.
- */
- static class ExtendedSolrQueryParser extends SolrQueryParser {
-
-
- /** A simple container for storing alias info
- */
- protected class Alias {
- public float tie;
- public Map<String,Float> fields;
- }
-
- boolean makeDismax=true;
- boolean disableCoord=true;
- boolean allowWildcard=true;
- int minClauseSize = 0; // minimum number of clauses per phrase query...
- // used when constructing boosting part of query via sloppy phrases
- boolean exceptions; // allow exceptions to be thrown (for example on a missing field)
-
- private Map<String, Analyzer> nonStopFilterAnalyzerPerField;
- private boolean removeStopFilter;
- String minShouldMatch; // for inner boolean queries produced from a single fieldQuery
-
- /**
- * Where we store a map from field name we expect to see in our query
- * string, to Alias object containing the fields to use in our
- * DisjunctionMaxQuery and the tiebreaker to use.
- */
- protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);
-
- public ExtendedSolrQueryParser(QParser parser, String defaultField) {
- super(parser, defaultField);
- // don't trust that our parent class won't ever change it's default
- setDefaultOperator(QueryParser.Operator.OR);
- }
-
- public void setRemoveStopFilter(boolean remove) {
-// analyzer.removeStopFilter = remove;
- removeStopFilter = remove;
- }
-
- @Override
- protected Query getBooleanQuery(List clauses, boolean disableCoord) throws SyntaxError {
- Query q = super.getBooleanQuery(clauses, disableCoord);
- if (q != null) {
- q = QueryUtils.makeQueryable(q);
- }
- return q;
- }
-
-
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
-
- @Override
- protected void addClause(List clauses, int conj, int mods, Query q) {
-//System.out.println("addClause:clauses="+clauses+" conj="+conj+" mods="+mods+" q="+q);
- super.addClause(clauses, conj, mods, q);
- }
-
- /**
- * Add an alias to this query parser.
- *
- * @param field the field name that should trigger alias mapping
- * @param fieldBoosts the mapping from fieldname to boost value that
- * should be used to build up the clauses of the
- * DisjunctionMaxQuery.
- * @param tiebreaker to the tiebreaker to be used in the
- * DisjunctionMaxQuery
- * @see SolrPluginUtils#parseFieldBoosts
- */
- public void addAlias(String field, float tiebreaker,
- Map<String,Float> fieldBoosts) {
-
- Alias a = new Alias();
- a.tie = tiebreaker;
- a.fields = fieldBoosts;
- aliases.put(field, a);
- }
-
- /**
- * Returns the aliases found for a field.
- * Returns null if there are no aliases for the field
- * @return Alias
- */
- public Alias getAlias(String field) {
- return aliases.get(field);
- }
-
-
- QType type;
- String field;
- String val;
- String val2;
- boolean bool;
- boolean bool2;
- float flt;
- int slop;
-
- @Override
- protected Query getFieldQuery(String field, String val, boolean quoted) throws SyntaxError {
-//System.out.println("getFieldQuery: val="+val);
-
- this.type = QType.FIELD;
- this.field = field;
- this.val = val;
- this.slop = getPhraseSlop(); // unspecified
- return getAliasedQuery();
- }
-
- @Override
- protected Query getFieldQuery(String field, String val, int slop) throws SyntaxError {
-//System.out.println("getFieldQuery: val="+val+" slop="+slop);
-
- this.type = QType.PHRASE;
- this.field = field;
- this.val = val;
- this.slop = slop;
- return getAliasedQuery();
- }
-
- @Override
- protected Query getPrefixQuery(String field, String val) throws SyntaxError {
-//System.out.println("getPrefixQuery: val="+val);
- if (val.equals("") && field.equals("*")) {
- return new MatchAllDocsQuery();
- }
- this.type = QType.PREFIX;
- this.field = field;
- this.val = val;
- return getAliasedQuery();
- }
-
- @Override
- protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError {
- Analyzer actualAnalyzer;
- if (removeStopFilter) {
- if (nonStopFilterAnalyzerPerField == null) {
- nonStopFilterAnalyzerPerField = new HashMap<String, Analyzer>();
- }
- actualAnalyzer = nonStopFilterAnalyzerPerField.get(field);
- if (actualAnalyzer == null) {
- actualAnalyzer = noStopwordFilterAnalyzer(field);
- }
- } else {
- actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
- }
- return super.newFieldQuery(actualAnalyzer, field, queryText, quoted);
- }
-
- @Override
- protected Query getRangeQuery(String field, String a, String b, boolean startInclusive, boolean endInclusive) throws SyntaxError {
-//System.out.println("getRangeQuery:");
-
- this.type = QType.RANGE;
- this.field = field;
- this.val = a;
- this.val2 = b;
- this.bool = startInclusive;
- this.bool2 = endInclusive;
- return getAliasedQuery();
- }
-
- @Override
- protected Query getWildcardQuery(String field, String val) throws SyntaxError {
-//System.out.println("getWildcardQuery: val="+val);
-
- if (val.equals("*")) {
- if (field.equals("*")) {
- return new MatchAllDocsQuery();
- } else{
- return getPrefixQuery(field,"");
- }
- }
- this.type = QType.WILDCARD;
- this.field = field;
- this.val = val;
- return getAliasedQuery();
- }
-
- @Override
- protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws SyntaxError {
-//System.out.println("getFuzzyQuery: val="+val);
-
- this.type = QType.FUZZY;
- this.field = field;
- this.val = val;
- this.flt = minSimilarity;
- return getAliasedQuery();
- }
-
- /**
- * Delegates to the super class unless the field has been specified
- * as an alias -- in which case we recurse on each of
- * the aliased fields, and the results are composed into a
- * DisjunctionMaxQuery. (so yes: aliases which point at other
- * aliases should work)
- */
- protected Query getAliasedQuery() throws SyntaxError {
- Alias a = aliases.get(field);
- this.validateCyclicAliasing(field);
- if (a != null) {
- List<Query> lst = getQueries(a);
- if (lst == null || lst.size()==0)
- return getQuery();
- // make a DisjunctionMaxQuery in this case too... it will stop
- // the "mm" processing from making everything required in the case
- // that the query expanded to multiple clauses.
- // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
- // if (lst.size()==1) return lst.get(0);
-
- if (makeDismax) {
- DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie);
- return q;
- } else {
- // should we disable coord?
- BooleanQuery q = new BooleanQuery(disableCoord);
- for (Query sub : lst) {
- q.add(sub, BooleanClause.Occur.SHOULD);
- }
- return q;
- }
- } else {
-
- // verify that a fielded query is actually on a field that exists... if not,
- // then throw an exception to get us out of here, and we'll treat it like a
- // literal when we try the escape+re-parse.
- if (exceptions) {
- FieldType ft = schema.getFieldTypeNoEx(field);
- if (ft == null && null == MagicFieldName.get(field)) {
- throw unknownField;
- }
- }
-
- return getQuery();
- }
- }
-
- /**
- * Validate there is no cyclic referencing in the aliasing
- */
- private void validateCyclicAliasing(String field) throws SyntaxError {
- Set<String> set = new HashSet<String>();
- set.add(field);
- if(validateField(field, set)) {
- throw new SyntaxError("Field aliases lead to a cycle");
- }
- }
-
- private boolean validateField(String field, Set<String> set) {
- if(this.getAlias(field) == null) {
- return false;
- }
- boolean hascycle = false;
- for(String referencedField:this.getAlias(field).fields.keySet()) {
- if(!set.add(referencedField)) {
- hascycle = true;
- } else {
- if(validateField(referencedField, set)) {
- hascycle = true;
- }
- set.remove(referencedField);
- }
- }
- return hascycle;
- }
-
- protected List<Query> getQueries(Alias a) throws SyntaxError {
- if (a == null) return null;
- if (a.fields.size()==0) return null;
- List<Query> lst= new ArrayList<Query>(4);
-
- for (String f : a.fields.keySet()) {
- this.field = f;
- Query sub = getAliasedQuery();
- if (sub != null) {
- Float boost = a.fields.get(f);
- if (boost != null) {
- sub.setBoost(boost);
- }
- lst.add(sub);
- }
- }
- return lst;
- }
-
- private Query getQuery() {
- try {
-
- switch (type) {
- case FIELD: // fallthrough
- case PHRASE:
- Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
- // A BooleanQuery is only possible from getFieldQuery if it came from
- // a single whitespace separated term. In this case, check the coordination
- // factor on the query: if its enabled, that means we aren't a set of synonyms
- // but instead multiple terms from one whitespace-separated term, we must
- // apply minShouldMatch here so that it works correctly with other things
- // like aliasing.
- if (query instanceof BooleanQuery) {
- BooleanQuery bq = (BooleanQuery) query;
- if (!bq.isCoordDisabled()) {
- SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch);
- }
- }
- if (query instanceof PhraseQuery) {
- PhraseQuery pq = (PhraseQuery)query;
- if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
- ((PhraseQuery)query).setSlop(slop);
- } else if (query instanceof MultiPhraseQuery) {
- MultiPhraseQuery pq = (MultiPhraseQuery)query;
- if (minClauseSize > 1 && pq.getTermArrays().size() < minClauseSize) return null;
- ((MultiPhraseQuery)query).setSlop(slop);
- } else if (minClauseSize > 1) {
- // if it's not a type of phrase query, it doesn't meet the minClauseSize requirements
- return null;
- }
- return query;
- case PREFIX: return super.getPrefixQuery(field, val);
- case WILDCARD: return super.getWildcardQuery(field, val);
- case FUZZY: return super.getFuzzyQuery(field, val, flt);
- case RANGE: return super.getRangeQuery(field, val, val2, bool, bool2);
- }
- return null;
-
- } catch (Exception e) {
- // an exception here is due to the field query not being compatible with the input text
- // for example, passing a string to a numeric field.
- return null;
- }
- }
-
- private Analyzer noStopwordFilterAnalyzer(String fieldName) {
- FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
- Analyzer qa = ft.getQueryAnalyzer();
- if (!(qa instanceof TokenizerChain)) {
- return qa;
- }
-
- TokenizerChain tcq = (TokenizerChain) qa;
- Analyzer ia = ft.getAnalyzer();
- if (ia == qa || !(ia instanceof TokenizerChain)) {
- return qa;
- }
- TokenizerChain tci = (TokenizerChain) ia;
-
- // make sure that there isn't a stop filter in the indexer
- for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
- if (tf instanceof StopFilterFactory) {
- return qa;
- }
- }
-
- // now if there is a stop filter in the query analyzer, remove it
- int stopIdx = -1;
- TokenFilterFactory[] facs = tcq.getTokenFilterFactories();
-
- for (int i = 0; i < facs.length; i++) {
- TokenFilterFactory tf = facs[i];
- if (tf instanceof StopFilterFactory) {
- stopIdx = i;
- break;
- }
- }
-
- if (stopIdx == -1) {
- // no stop filter exists
- return qa;
- }
-
- TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1];
- for (int i = 0, j = 0; i < facs.length; i++) {
- if (i == stopIdx) continue;
- newtf[j++] = facs[i];
- }
-
- TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
- newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));
- return newa;
- }
- }
-
- static boolean isEmpty(Query q) {
- if (q==null) return true;
- if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true;
- return false;
- }
-
- /**
- * Class that encapsulates the input from userFields parameter and can answer whether
- * a field allowed or disallowed as fielded query in the query string
- */
- static class UserFields {
- private Map<String,Float> userFieldsMap;
- private DynamicField[] dynamicUserFields;
- private DynamicField[] negativeDynamicUserFields;
-
- UserFields(Map<String,Float> ufm) {
- userFieldsMap = ufm;
- if (0 == userFieldsMap.size()) {
- userFieldsMap.put("*", null);
- }
-
- // Process dynamic patterns in userFields
- ArrayList<DynamicField> dynUserFields = new ArrayList<DynamicField>();
- ArrayList<DynamicField> negDynUserFields = new ArrayList<DynamicField>();
- for(String f : userFieldsMap.keySet()) {
- if(f.contains("*")) {
- if(f.startsWith("-"))
- negDynUserFields.add(new DynamicField(f.substring(1)));
- else
- dynUserFields.add(new DynamicField(f));
- }
- }
- Collections.sort(dynUserFields);
- dynamicUserFields = dynUserFields.toArray(new DynamicField[dynUserFields.size()]);
- Collections.sort(negDynUserFields);
- negativeDynamicUserFields = negDynUserFields.toArray(new DynamicField[negDynUserFields.size()]);
-// System.out.println("** userF="+userFieldsMap+", dynUF="+Arrays.toString(dynamicUserFields)+", negDynUF="+Arrays.toString(negativeDynamicUserFields));
- }
-
- /**
- * Is the given field name allowed according to UserFields spec given in the uf parameter?
- * @param fname the field name to examine
- * @return true if the fielded queries are allowed on this field
- */
- public boolean isAllowed(String fname) {
- boolean res = ((userFieldsMap.containsKey(fname) || isDynField(fname, false)) &&
- !userFieldsMap.containsKey("-"+fname) &&
- !isDynField(fname, true));
- return res;
- }
-
- private boolean isDynField(String field, boolean neg) {
- return getDynFieldForName(field, neg) == null ? false : true;
- }
-
- private String getDynFieldForName(String f, boolean neg) {
- for( DynamicField df : neg?negativeDynamicUserFields:dynamicUserFields ) {
- if( df.matches( f ) ) return df.wildcard;
- }
- return null;
- }
-
- /**
- * Finds the default user field boost associated with the given field.
- * This is parsed from the uf parameter, and may be specified as wildcards, e.g. *name^2.0 or *^3.0
- * @param field the field to find boost for
- * @return the float boost value associated with the given field or a wildcard matching the field
- */
- public Float getBoost(String field) {
- return (userFieldsMap.containsKey(field)) ?
- userFieldsMap.get(field) : // Exact field
- userFieldsMap.get(getDynFieldForName(field, false)); // Dynamic field
- }
- }
-
- /* Represents a dynamic field, for easier matching, inspired by same class in IndexSchema */
- static class DynamicField implements Comparable<DynamicField> {
- final static int STARTS_WITH=1;
- final static int ENDS_WITH=2;
- final static int CATCHALL=3;
-
- final String wildcard;
- final int type;
-
- final String str;
-
- protected DynamicField(String wildcard) {
- this.wildcard = wildcard;
- if (wildcard.equals("*")) {
- type=CATCHALL;
- str=null;
- }
- else if (wildcard.startsWith("*")) {
- type=ENDS_WITH;
- str=wildcard.substring(1);
- }
- else if (wildcard.endsWith("*")) {
- type=STARTS_WITH;
- str=wildcard.substring(0,wildcard.length()-1);
- }
- else {
- throw new RuntimeException("dynamic field name must start or end with *");
- }
- }
-
- /*
- * Returns true if the regex wildcard for this DynamicField would match the input field name
- */
- public boolean matches(String name) {
- if (type==CATCHALL) return true;
- else if (type==STARTS_WITH && name.startsWith(str)) return true;
- else if (type==ENDS_WITH && name.endsWith(str)) return true;
- else return false;
- }
-
- /**
- * Sort order is based on length of regex. Longest comes first.
- * @param other The object to compare to.
- * @return a negative integer, zero, or a positive integer
- * as this object is less than, equal to, or greater than
- * the specified object.
- */
- @Override
- public int compareTo(DynamicField other) {
- return other.wildcard.length() - wildcard.length();
- }
-
- @Override
- public String toString() {
- return this.wildcard;
- }
- }
-}
+}
\ No newline at end of file
Modified: lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java (original)
+++ lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java Fri Jan 11 14:39:45 2013
@@ -20,9 +20,13 @@ package org.apache.solr.servlet;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
import java.net.URL;
-import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@@ -32,20 +36,20 @@ import java.util.Locale;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.input.BoundedInputStream;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
+import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
+import org.apache.solr.common.util.FastInputStream;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
@@ -71,7 +75,7 @@ public class SolrRequestParsers
/** Default instance for e.g. admin requests. Limits to 2 MB uploads and does not allow remote streams. */
public static final SolrRequestParsers DEFAULT = new SolrRequestParsers();
-
+
/**
* Pass in an xml configuration. A null configuration will enable
* everything with maximum values.
@@ -197,37 +201,140 @@ public class SolrRequestParsers
*/
public static MultiMapSolrParams parseQueryString(String queryString) {
Map<String,String[]> map = new HashMap<String, String[]>();
- parseQueryString(queryString, "UTF-8", map);
+ parseQueryString(queryString, map);
return new MultiMapSolrParams(map);
}
/**
- * Given a url-encoded query string, map it into the given map
+ * Given a url-encoded query string (UTF-8), map it into the given map
* @param queryString as given from URL
- * @param charset to be used to decode %-encoding
* @param map place all parameters in this map
*/
- static void parseQueryString(String queryString, String charset, Map<String,String[]> map) {
- if( queryString != null && queryString.length() > 0 ) {
+ static void parseQueryString(final String queryString, final Map<String,String[]> map) {
+ if (queryString != null && queryString.length() > 0) {
try {
- for( String kv : queryString.split( "&" ) ) {
- int idx = kv.indexOf( '=' );
- if( idx >= 0 ) {
- String name = URLDecoder.decode( kv.substring( 0, idx ), charset);
- String value = URLDecoder.decode( kv.substring( idx+1 ), charset);
- MultiMapSolrParams.addParam( name, value, map );
- } else {
- String name = URLDecoder.decode( kv, charset );
- MultiMapSolrParams.addParam( name, "", map );
+ final int len = queryString.length();
+ // this input stream emulates to get the raw bytes from the URL as passed to servlet container, it disallows any byte > 127 and enforces to %-escape them:
+ final InputStream in = new InputStream() {
+ int pos = 0;
+ @Override
+ public int read() {
+ if (pos < len) {
+ final char ch = queryString.charAt(pos);
+ if (ch > 127) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: The query string contains a not-%-escaped byte > 127 at position " + pos);
+ }
+ pos++;
+ return ch;
+ } else {
+ return -1;
+ }
}
- }
+ };
+ parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map);
+ } catch (IOException ioe) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
+ }
+ }
+ }
+
+ /**
+ * Given a url-encoded form from POST content (as InputStream), map it into the given map.
+ * The given InputStream should be buffered!
+ * @param postContent to be parsed
+ * @param charset to be used to decode resulting bytes after %-decoding
+ * @param map place all parameters in this map
+ */
+ @SuppressWarnings("fallthrough")
+ static long parseFormDataContent(final InputStream postContent, final long maxLen, final Charset charset, final Map<String,String[]> map) throws IOException {
+ final CharsetDecoder charsetDecoder = charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ long len = 0L, keyPos = 0L, valuePos = 0L;
+ final ByteArrayOutputStream2 keyStream = new ByteArrayOutputStream2(),
+ valueStream = new ByteArrayOutputStream2();
+ ByteArrayOutputStream2 currentStream = keyStream;
+ for(;;) {
+ int b = postContent.read();
+ switch (b) {
+ case -1: // end of stream
+ case '&': // separator
+ if (keyStream.size() > 0) {
+ final String key = decodeChars(keyStream, keyPos, charsetDecoder), value = decodeChars(valueStream, valuePos, charsetDecoder);
+ MultiMapSolrParams.addParam(key, value, map);
+ } else if (valueStream.size() > 0) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
+ }
+ keyStream.reset();
+ valueStream.reset();
+ keyPos = valuePos = len + 1;
+ currentStream = keyStream;
+ break;
+ case '+': // space replacement
+ currentStream.write(' ');
+ break;
+ case '%': // escape
+ final int upper = digit16(b = postContent.read());
+ len++;
+ final int lower = digit16(b = postContent.read());
+ len++;
+ currentStream.write(((upper << 4) + lower));
+ break;
+ case '=': // kv separator
+ if (currentStream == keyStream) {
+ valuePos = len + 1;
+ currentStream = valueStream;
+ break;
+ }
+ // fall-through
+ default:
+ currentStream.write(b);
}
- catch( UnsupportedEncodingException uex ) {
- throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, uex );
+ if (b == -1) {
+ break;
+ }
+ len++;
+ if (len > maxLen) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen/1024L) + " KB");
}
}
+ return len;
}
-
+
+ private static String decodeChars(ByteArrayOutputStream2 stream, long position, CharsetDecoder charsetDecoder) {
+ try {
+ return charsetDecoder.decode(ByteBuffer.wrap(stream.buffer(), 0, stream.size())).toString();
+ } catch (CharacterCodingException cce) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "URLDecoder: Invalid character encoding detected after position " + position +
+ " of query string / form data (while parsing as " + charsetDecoder.charset().name() + ")"
+ );
+ }
+ }
+
+ /** Makes the buffer of ByteArrayOutputStream available without copy. */
+ static final class ByteArrayOutputStream2 extends ByteArrayOutputStream {
+ byte[] buffer() {
+ return buf;
+ }
+ }
+
+ private static int digit16(int b) {
+ if (b == -1) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Incomplete trailing escape (%) pattern");
+ }
+ if (b >= '0' && b <= '9') {
+ return b - '0';
+ }
+ if (b >= 'A' && b <= 'F') {
+ return b - ('A' - 10);
+ }
+ if (b >= 'a' && b <= 'f') {
+ return b - ('a' - 10);
+ }
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Invalid digit (" + ((char) b) + ") in escape (%) pattern");
+ }
+
public boolean isHandleSelect() {
return handleSelect;
}
@@ -404,15 +511,12 @@ class FormDataRequestParser implements S
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Not application/x-www-form-urlencoded content: "+req.getContentType() );
}
- String charset = ContentStreamBase.getCharsetFromContentType(req.getContentType());
- if (charset == null) charset = "UTF-8";
-
final Map<String,String[]> map = new HashMap<String, String[]>();
// also add possible URL parameters and include into the map (parsed using UTF-8):
final String qs = req.getQueryString();
if (qs != null) {
- SolrRequestParsers.parseQueryString(qs, "UTF-8", map);
+ SolrRequestParsers.parseQueryString(qs, map);
}
// may be -1, so we check again later. But if its already greater we can stop processing!
@@ -424,26 +528,21 @@ class FormDataRequestParser implements S
}
// get query String from request body, using the charset given in content-type:
- final InputStream in;
+ final String cs = ContentStreamBase.getCharsetFromContentType(req.getContentType());
+ final Charset charset = (cs == null) ? IOUtils.CHARSET_UTF_8 : Charset.forName(cs);
+ InputStream in = null;
try {
in = req.getInputStream();
- } catch (IllegalStateException ise) {
- throw (SolrException) getParameterIncompatibilityException().initCause(ise);
- }
- try {
- final String data = IOUtils.toString(new BoundedInputStream(in, maxLength), charset);
- // if there is remaining data in the underlying stream, throw exception:
- if (in.read() != -1) {
- // read remaining data and throw away:
- while (IOUtils.skip(in, 1024L) > 0);
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + uploadLimitKB + " KB");
- }
- if (data.length() == 0 && totalLength > 0L) {
+ final long bytesRead = SolrRequestParsers.parseFormDataContent(FastInputStream.wrap(in), maxLength, charset, map);
+ if (bytesRead == 0L && totalLength > 0L) {
throw getParameterIncompatibilityException();
}
- SolrRequestParsers.parseQueryString(data, charset, map);
+ } catch (IOException ioe) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
+ } catch (IllegalStateException ise) {
+ throw (SolrException) getParameterIncompatibilityException().initCause(ise);
} finally {
- IOUtils.closeQuietly(in);
+ IOUtils.closeWhileHandlingException(in);
}
return new MultiMapSolrParams(map);
Modified: lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java (original)
+++ lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java Fri Jan 11 14:39:45 2013
@@ -242,7 +242,7 @@ public abstract class FieldMutatingUpdat
/**
* Removes all instance of the key from NamedList, returning the Set of
- * Strings that key refered to. Throws an error if the key didn't refer
+ * Strings that key referred to. Throws an error if the key didn't refer
* to one or more strings (or arrays of strings)
* @exception SolrException invalid arr/str structure.
*/
Modified: lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java (original)
+++ lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java Fri Jan 11 14:39:45 2013
@@ -622,7 +622,7 @@ public class SolrPluginUtils {
private final static Pattern CONSECUTIVE_OP_PATTERN = Pattern.compile( "\\s+[+-](?:\\s*[+-]+)+" );
/**
- * Strips operators that are used illegally, otherwise reuturns it's
+ * Strips operators that are used illegally, otherwise returns its
* input. Some examples of illegal user queries are: "chocolate +-
* chip", "chocolate - - chip", and "chocolate chip -".
*/
Modified: lucene/dev/branches/lucene4547/solr/core/src/test-files/solr/collection1/conf/schema12.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/core/src/test-files/solr/collection1/conf/schema12.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/core/src/test-files/solr/collection1/conf/schema12.xml (original)
+++ lucene/dev/branches/lucene4547/solr/core/src/test-files/solr/collection1/conf/schema12.xml Fri Jan 11 14:39:45 2013
@@ -597,6 +597,7 @@
<!-- unused, for testing luke copyFields -->
<dynamicField name="foo_copysource_*" type="ignored" multiValued="true"/>
<dynamicField name="bar_copydest_*" type="ignored" multiValued="true"/>
+ <dynamicField name="*_es" type="text" indexed="true" stored="true"/>
</fields>