You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2015/01/14 09:02:44 UTC

svn commit: r1651587 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/update/processor/ server/solr/configsets/data_driven_schema_configs/conf/ solrj/src/test/org/apache/solr/client/solrj/

Author: noble
Date: Wed Jan 14 08:02:43 2015
New Revision: 1651587

URL: http://svn.apache.org/r1651587
Log:
SOLR-6937 In schemaless mode ,replace spaces and special characters with underscore

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml
    lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java
    lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Jan 14 08:02:43 2015
@@ -202,7 +202,7 @@ New Features
   (hossman)
 
 * SOLR-6485: ReplicationHandler should have an option to throttle the speed of
-  replication (Varun Thacker, NOble Paul)
+  replication (Varun Thacker, Noble Paul)
 
 * SOLR-6543: Give HttpSolrClient the ability to send PUT requests (Gregory Chanan)
 
@@ -305,6 +305,8 @@ New Features
 
 * SOLR-6581: Efficient DocValues support and numeric collapse field implementations 
   for Collapse and Expand (Joel Bernstein)
+
+* SOLR-6937: In schemaless mode ,replace spaces and special characters with underscore (Noble Paul)
   
 Bug Fixes
 ----------------------

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java?rev=1651587&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/FieldNameMutatingUpdateProcessorFactory.java Wed Jan 14 08:02:43 2015
@@ -0,0 +1,103 @@
+package org.apache.solr.update.processor;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * In the FieldNameMutatingUpdateProcessorFactory configured below,
+ * fields names will be mutated if the name contains space.
+ * Use multiple instances of this processor for multiple replacements
+ * </p>
+ * <pre class="prettyprint">
+ * &lt;processor class="solr.FieldNameMutatingUpdateProcessorFactory"&gt;
+ *   &lt;str name="pattern "&gt;\s&lt;/str&gt;
+ *   &lt;str name="replacement"&gt;_&lt;/str&gt;
+ * &lt;/processor&gt;</pre>
+ */
+
+public class FieldNameMutatingUpdateProcessorFactory  extends UpdateRequestProcessorFactory{
+  public static final Logger log = LoggerFactory.getLogger(FieldNameMutatingUpdateProcessorFactory.class);
+
+  private String sourcePattern, replacement;
+  private Pattern pattern;
+
+
+  @Override
+  public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    return new UpdateRequestProcessor(next) {
+      @Override
+      public void processAdd(AddUpdateCommand cmd) throws IOException {
+        final SolrInputDocument doc = cmd.getSolrInputDocument();
+        final Collection<String> fieldNames
+            = new ArrayList<>(doc.getFieldNames());
+
+        for (final String fname : fieldNames) {
+          Matcher matcher = pattern.matcher(fname);
+          if(matcher.find() ){
+            String newFieldName = matcher.replaceAll(replacement);
+            if(!newFieldName.equals(fname)){
+              SolrInputField old = doc.remove(fname);
+              old.setName(newFieldName);
+              doc.put(newFieldName, old);
+            }
+          }
+        }
+
+        super.processAdd(cmd);
+      }
+
+      @Override
+      public void processDelete(DeleteUpdateCommand cmd) throws IOException {
+        super.processDelete(cmd);
+      }
+    };
+  }
+
+  @Override
+  public void init(NamedList args) {
+    sourcePattern = (String) args.get("pattern");
+    replacement = (String) args.get("replacement");
+    if(sourcePattern ==null || replacement == null) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"'pattern' and 'replacement' are required values");
+    }
+    try {
+      pattern = Pattern.compile(sourcePattern);
+    } catch (Exception e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"invalid pattern "+ sourcePattern );
+    }
+    super.init(args);
+  }
+}

Modified: lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/server/solr/configsets/data_driven_schema_configs/conf/solrconfig.xml Wed Jan 14 08:02:43 2015
@@ -1378,6 +1378,10 @@
     <processor class="solr.LogUpdateProcessorFactory"/>
     <processor class="solr.DistributedUpdateProcessorFactory"/>
     <processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
+    <processor class="solr.FieldNameMutatingUpdateProcessorFactory">
+      <str name="pattern">[^\w-]</str>
+      <str name="replacement">_</str>
+    </processor>
     <processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
     <processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
     <processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>

Modified: lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java (original)
+++ lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTestsBase.java Wed Jan 14 08:02:43 2015
@@ -258,12 +258,14 @@ abstract public class SolrExampleTestsBa
     assertEquals(10, cnt.get());
   }
   
-  protected void assertNumFound(String query, int num)
+  protected QueryResponse assertNumFound(String query, int num)
       throws SolrServerException, IOException {
     QueryResponse rsp = getSolrClient().query(new SolrQuery(query));
     if (num != rsp.getResults().getNumFound()) {
       fail("expected: " + num + " but had: " + rsp.getResults().getNumFound()
           + " :: " + rsp.getResults());
     }
+    return rsp;
+
   }
 }

Modified: lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java?rev=1651587&r1=1651586&r2=1651587&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java (original)
+++ lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/SolrSchemalessExampleTest.java Wed Jan 14 08:02:43 2015
@@ -25,6 +25,8 @@ import org.apache.http.entity.InputStrea
 import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
 import org.apache.solr.client.solrj.impl.BinaryResponseParser;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.util.ExternalPaths;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -34,7 +36,11 @@ import org.slf4j.LoggerFactory;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.OutputStreamWriter;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Properties;
+import java.util.Set;
 
 public class SolrSchemalessExampleTest extends SolrExampleTestsBase {
   private static Logger log = LoggerFactory.getLogger(SolrSchemalessExampleTest.class);
@@ -83,6 +89,44 @@ public class SolrSchemalessExampleTest e
     assertNumFound("*:*", 2);
   }
 
+  @Test
+  public void testFieldMutating() throws Exception {
+    HttpSolrClient client = (HttpSolrClient) getSolrClient();
+    client.deleteByQuery("*:*");
+    client.commit();
+    assertNumFound("*:*", 0); // make sure it got in
+    // two docs, one with uniqueKey, another without it
+    String json = "{\"name one\": \"name\"} " +
+        "{\"name  two\" : \"name\"}" +
+        "{\"first-second\" : \"name\"}" +
+        "{\"x+y\" : \"name\"}" +
+        "{\"p%q\" : \"name\"}" +
+        "{\"a&b\" : \"name\"}"
+        ;
+    HttpClient httpClient = client.getHttpClient();
+    HttpPost post = new HttpPost(client.getBaseURL() + "/update/json/docs");
+    post.setHeader("Content-Type", "application/json");
+    post.setEntity(new InputStreamEntity(new ByteArrayInputStream(json.getBytes("UTF-8")), -1));
+    HttpResponse response = httpClient.execute(post);
+    assertEquals(200, response.getStatusLine().getStatusCode());
+    client.commit();
+    List<String> expected = Arrays.asList(
+        "name_one",
+        "name__two",
+        "first-second",
+        "a_b",
+        "p_q",
+        "x_y");
+    HashSet set = new HashSet();
+    QueryResponse rsp = assertNumFound("*:*", expected.size());
+    for (SolrDocument doc : rsp.getResults()) set.addAll(doc.getFieldNames());
+    for (String s : expected) {
+      assertTrue(s+" not created "+ rsp ,set.contains(s) );
+    }
+
+  }
+
+
 
   @Override
   public SolrClient createNewSolrClient() {