You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2015/01/14 09:02:44 UTC
svn commit: r1651587 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/update/processor/
server/solr/configsets/data_driven_schema_configs/conf/
solrj/src/test/org/apache/solr/client/solrj/
Author: noble
Date: Wed Jan 14 08:02:43 2015
New Revision: 1651587
URL: http://svn.apache.org/r1651587
Log:
SOLR-6937 In schemaless mode ,replace spaces and special characters with underscore
Added:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java (with props)
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml
lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java
lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jan 14 08:02:43 2015
@@ -202,7 +202,7 @@ New Features
(hossman)
* SOLR-6485: ReplicationHandler should have an option to throttle the speed of
- replication (Varun Thacker, NOble Paul)
+ replication (Varun Thacker, Noble Paul)
* SOLR-6543: Give HttpSolrClient the ability to send PUT requests (Gregory Chanan)
@@ -305,6 +305,8 @@ New Features
* SOLR-6581: Efficient DocValues support and numeric collapse field implementations
for Collapse and Expand (Joel Bernstein)
+
+* SOLR-6937: In schemaless mode ,replace spaces and special characters with underscore (Noble Paul)
Bug Fixes
----------------------
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java?rev=1651587&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java Wed Jan 14 08:02:43 2015
@@ -0,0 +1,103 @@
+package org.apache.solr.update.processor;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * In the FieldNameMutatingUpdateProcessorFactory configured below,
+ * fields names will be mutated if the name contains space.
+ * Use multiple instances of this processor for multiple replacements
+ * </p>
+ * <pre class="prettyprint">
+ * <processor class="solr.FieldNameMutatingUpdateProcessorFactory">
+ * <str name="pattern ">\s</str>
+ * <str name="replacement">_</str>
+ * </processor></pre>
+ */
+
+public class FieldNameMutatingUpdateProcessorFactory extends UpdateRequestProcessorFactory{
+ public static final Logger log = LoggerFactory.getLogger(FieldNameMutatingUpdateProcessorFactory.class);
+
+ private String sourcePattern, replacement;
+ private Pattern pattern;
+
+
+ @Override
+ public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
+ return new UpdateRequestProcessor(next) {
+ @Override
+ public void processAdd(AddUpdateCommand cmd) throws IOException {
+ final SolrInputDocument doc = cmd.getSolrInputDocument();
+ final Collection<String> fieldNames
+ = new ArrayList<>(doc.getFieldNames());
+
+ for (final String fname : fieldNames) {
+ Matcher matcher = pattern.matcher(fname);
+ if(matcher.find() ){
+ String newFieldName = matcher.replaceAll(replacement);
+ if(!newFieldName.equals(fname)){
+ SolrInputField old = doc.remove(fname);
+ old.setName(newFieldName);
+ doc.put(newFieldName, old);
+ }
+ }
+ }
+
+ super.processAdd(cmd);
+ }
+
+ @Override
+ public void processDelete(DeleteUpdateCommand cmd) throws IOException {
+ super.processDelete(cmd);
+ }
+ };
+ }
+
+ @Override
+ public void init(NamedList args) {
+ sourcePattern = (String) args.get("pattern");
+ replacement = (String) args.get("replacement");
+ if(sourcePattern ==null || replacement == null) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"'pattern' and 'replacement' are required values");
+ }
+ try {
+ pattern = Pattern.compile(sourcePattern);
+ } catch (Exception e) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"invalid pattern "+ sourcePattern );
+ }
+ super.init(args);
+ }
+}
Modified: lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml Wed Jan 14 08:02:43 2015
@@ -1378,6 +1378,10 @@
<processor class="solr.LogUpdateProcessorFactory"/>
<processor class="solr.DistributedUpdateProcessorFactory"/>
<processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
+ <processor class="solr.FieldNameMutatingUpdateProcessorFactory">
+ <str name="pattern">[^\w-]</str>
+ <str name="replacement">_</str>
+ </processor>
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
<processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
Modified: lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java (original)
+++ lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java Wed Jan 14 08:02:43 2015
@@ -258,12 +258,14 @@ abstract public class SolrExampleTestsBa
assertEquals(10, cnt.get());
}
- protected void assertNumFound(String query, int num)
+ protected QueryResponse assertNumFound(String query, int num)
throws SolrServerException, IOException {
QueryResponse rsp = getSolrClient().query(new SolrQuery(query));
if (num != rsp.getResults().getNumFound()) {
fail("expected: " + num + " but had: " + rsp.getResults().getNumFound()
+ " :: " + rsp.getResults());
}
+ return rsp;
+
}
}
Modified: lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java (original)
+++ lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java Wed Jan 14 08:02:43 2015
@@ -25,6 +25,8 @@ import org.apache.http.entity.InputStrea
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
import org.apache.solr.util.ExternalPaths;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -34,7 +36,11 @@ import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.OutputStreamWriter;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
import java.util.Properties;
+import java.util.Set;
public class SolrSchemalessExampleTest extends SolrExampleTestsBase {
private static Logger log = LoggerFactory.getLogger(SolrSchemalessExampleTest.class);
@@ -83,6 +89,44 @@ public class SolrSchemalessExampleTest e
assertNumFound("*:*", 2);
}
+ @Test
+ public void testFieldMutating() throws Exception {
+ HttpSolrClient client = (HttpSolrClient) getSolrClient();
+ client.deleteByQuery("*:*");
+ client.commit();
+ assertNumFound("*:*", 0); // make sure it got in
+ // two docs, one with uniqueKey, another without it
+ String json = "{\"name one\": \"name\"} " +
+ "{\"name two\" : \"name\"}" +
+ "{\"first-second\" : \"name\"}" +
+ "{\"x+y\" : \"name\"}" +
+ "{\"p%q\" : \"name\"}" +
+ "{\"a&b\" : \"name\"}"
+ ;
+ HttpClient httpClient = client.getHttpClient();
+ HttpPost post = new HttpPost(client.getBaseURL() + "/update/json/docs");
+ post.setHeader("Content-Type", "application/json");
+ post.setEntity(new InputStreamEntity(new ByteArrayInputStream(json.getBytes("UTF-8")), -1));
+ HttpResponse response = httpClient.execute(post);
+ assertEquals(200, response.getStatusLine().getStatusCode());
+ client.commit();
+ List<String> expected = Arrays.asList(
+ "name_one",
+ "name__two",
+ "first-second",
+ "a_b",
+ "p_q",
+ "x_y");
+ HashSet set = new HashSet();
+ QueryResponse rsp = assertNumFound("*:*", expected.size());
+ for (SolrDocument doc : rsp.getResults()) set.addAll(doc.getFieldNames());
+ for (String s : expected) {
+ assertTrue(s+" not created "+ rsp ,set.contains(s) );
+ }
+
+ }
+
+
@Override
public SolrClient createNewSolrClient() {