You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by mu...@apache.org on 2021/01/07 16:32:03 UTC

[lucene-solr] branch master updated (4ab5d31 -> 6ff4a9b)

This is an automated email from the ASF dual-hosted git repository.

munendrasn pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from 4ab5d31  SOLR-15040: Update CHANGES.txt
     new 0846da5  SOLR-14950: fix regenerating of copyfield with explicit src/dest matching dyn rule
     new d7fd3d8  SOLR-12539: handle extra spaces in JSON facet shorthand syntax
     new 6ff4a9b  SOLR-14514: add extra checks for picking 'stream' method in JSON facet

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 solr/CHANGES.txt                                   |  12 ++-
 .../org/apache/solr/schema/ManagedIndexSchema.java |   5 +-
 .../org/apache/solr/search/facet/FacetField.java   |   5 +-
 .../org/apache/solr/search/facet/FacetParser.java  |   7 +-
 .../apache/solr/rest/schema/TestBulkSchemaAPI.java | 102 +++++++++++++++++++++
 .../solr/search/facet/RangeFacetCloudTest.java     |   6 --
 .../solr/search/facet/TestCloudJSONFacetSKG.java   |  29 +++---
 .../search/facet/TestCloudJSONFacetSKGEquiv.java   |  23 ++---
 .../apache/solr/search/facet/TestJsonFacets.java   |  30 +++---
 solr/solr-ref-guide/src/json-facet-api.adoc        |   2 +-
 10 files changed, 161 insertions(+), 60 deletions(-)

[lucene-solr] 03/03: SOLR-14514: add extra checks for picking 'stream' method in JSON facet

Posted by mu...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

munendrasn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 6ff4a9b395a68d9b0d9e259537e3f5daf0278d51
Author: Munendra S N <mu...@apache.org>
AuthorDate: Thu Nov 26 12:01:51 2020 +0530

    SOLR-14514: add extra checks for picking 'stream' method in JSON facet
    
    missing, allBuckets, and numBuckets is not supported with stream method.
    So, avoiding picking stream method when any one of them is enabled even if
    facet sort is 'index asc'
---
 solr/CHANGES.txt                                   |  3 +++
 .../org/apache/solr/search/facet/FacetField.java   |  5 +++-
 .../solr/search/facet/TestCloudJSONFacetSKG.java   | 29 ++++++++--------------
 .../search/facet/TestCloudJSONFacetSKGEquiv.java   | 23 ++++++-----------
 .../apache/solr/search/facet/TestJsonFacets.java   | 28 +++++++++++----------
 solr/solr-ref-guide/src/json-facet-api.adoc        |  2 +-
 6 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b6e74bb..a8555a2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -310,6 +310,9 @@ Bug Fixes
 * SOLR-12539: Handle parsing of values for 'other', 'include', and 'excludeTags' in JSON facets when specified as
   comma-separated values with extra spaces (hossman, Munendra S N)
 
+* SOLR-14514: Avoid picking 'stream' method in JSON facet when any of 'allBuckets', 'numBuckets', and 'missing' parameters are enabled
+  (hossman, Munendra S N)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index 728cd6e..fa1c085 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -105,7 +105,10 @@ public class FacetField extends FacetRequestSorted {
       method = FacetMethod.STREAM;
     }
     if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField() &&
-        // wether we can use stream processing depends on wether this is a shard request, wether
+        // streaming doesn't support allBuckets, numBuckets or missing
+        // so, don't use stream processor if anyone of them is enabled
+        !(allBuckets || numBuckets || missing) &&
+        // whether we can use stream processing depends on whether this is a shard request, whether
         // re-sorting has been requested, and if the effective sort during collection is "index asc"
         ( fcontext.isShard()
           // for a shard request, the effective per-shard sort must be index asc
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
index 0992af8..f2b67d5 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
@@ -45,18 +45,17 @@ import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
-import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
-
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
-
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
+import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
+
 /** 
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> function, that asserts the 
@@ -65,13 +64,13 @@ import org.slf4j.LoggerFactory;
  * <p>
  * Note that unlike normal facet "count" verification, using a high limit + overrequest isn't a substitute 
  * for refinement in order to ensure accurate "skg" computation across shards.  For that reason, this 
- * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifices a 
- * <code>domain: { 'query':'*:*' }</code> for every facet, in order to garuntee that all shards 
+ * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifies a
+ * <code>domain: { 'query':'*:*' }</code> for every facet, in order to guarantee that all shards
  * participate in all facets, so that the popularity &amp; relatedness values returned can be proven 
  * with validation requests.
  * </p>
  * <p>
- * (Refinement alone is not enough. Using the '*:*' query as the facet domain is neccessary to 
+ * (Refinement alone is not enough. Using the '*:*' query as the facet domain is necessary to
  * prevent situations where a single shardX may return candidate bucket with no child-buckets due to 
  * the normal facet intersections, but when refined on other shardY(s), can produce "high scoring" 
  * SKG child-buckets, which would then be missing the foreground/background "size" contributions from 
@@ -156,7 +155,7 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
       SolrInputDocument doc = sdoc("id", ""+id);
       for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
         // NOTE: we ensure every doc has at least one value in each field
-        // that way, if a term is returned for a parent there there is garunteed to be at least one
+        // that way, if a term is returned for a parent there there is guaranteed to be at least one
         // one term in the child facet as well.
         //
         // otherwise, we'd face the risk of a single shardX returning parentTermX as a top term for
@@ -226,7 +225,7 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
 
   /**
    * Given a (random) field number, returns a random (integer based) value for that field.
-   * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
+   * NOTE: The number of unique values in each field is constant according to {@link #UNIQUE_FIELD_VALS}
    * but the precise <em>range</em> of values will vary for each unique field number, such that cross field joins 
    * will match fewer documents based on how far apart the field numbers are.
    *
@@ -279,7 +278,7 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
       
       // NOTE that these two queries & facets *should* effectively identical given that the
       // very large limit value is big enough no shard will ever return that may terms,
-      // but the "limit=-1" case it actaully triggers slightly different code paths
+      // but the "limit=-1" case it actually triggers slightly different code paths
       // because it causes FacetField.returnsPartial() to be "true"
       for (int limit : new int[] { 999999999, -1 }) {
         Map<String,TermFacet> facets = new LinkedHashMap<>();
@@ -769,14 +768,8 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
      *
      *
      * @return a Boolean, may be null
-     * @see <a href="https://issues.apache.org/jira/browse/SOLR-14514">SOLR-14514: allBuckets ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String sort) {
-
-      if ("index asc".equals(sort)) {
-        return null;
-      }
-      
       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
index eb68662..8e09593 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
@@ -20,8 +20,8 @@ import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.Arrays;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.LinkedHashMap;
@@ -47,22 +47,21 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import static org.apache.solr.search.facet.FacetField.FacetMethod;
-import static org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc.SWEEP_COLLECTION_DEBUG_KEY;
-
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
-  
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.search.facet.FacetField.FacetMethod;
+import static org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc.SWEEP_COLLECTION_DEBUG_KEY;
+
 /** 
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> function, that asserts the 
- * results are consistent and equivilent regardless of what <code>method</code> (ie: FacetFieldProcessor) 
+ * results are consistent and equivalent regardless of what <code>method</code> (ie: FacetFieldProcessor)
  * and/or <code>{@value RelatednessAgg#SWEEP_COLLECTION}</code> option is requested.
  * </p>
  * <p>
@@ -71,7 +70,7 @@ import org.slf4j.LoggerFactory;
  * because this test does not attempt to prove the results with validation requests.
  * </p>
  * <p>
- * This test only concerns itself with the equivilency of results
+ * This test only concerns itself with the equivalency of results
  * </p>
  * 
  * @see TestCloudJSONFacetSKG
@@ -986,14 +985,8 @@ public class TestCloudJSONFacetSKGEquiv extends SolrCloudTestCase {
      * </p>
      *
      * @return a Boolean, may be null
-     * @see <a href="https://issues.apache.org/jira/browse/SOLR-14514">SOLR-14514: allBuckets ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String sort) {
-
-      if ("index asc".equals(sort)) {
-        return null;
-      }
-      
       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index c5a7a1d..166968b 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -488,21 +488,17 @@ public class TestJsonFacets extends SolrTestCaseHS {
     }
 
     // relatedness shouldn't be computed for allBuckets, but it also shouldn't cause any problems
-    //
-    // NOTE: we can't test this with 'index asc' because STREAM processor
-    // (which test may randomize as default) doesn't support allBuckets
-    // see: https://issues.apache.org/jira/browse/SOLR-14514
-    //
     for (String sort : Arrays.asList("sort:'y desc'",
                                      "sort:'z desc'",
                                      "sort:'skg desc'",
+                                     "sort:'index asc'",
                                      "prelim_sort:'count desc', sort:'skg desc'")) {
-      // the relatedness score of each of our cat_s values is (conviniently) also alphabetical order,
+      // the relatedness score of each of our cat_s values is (conveniently) also alphabetical order,
       // (and the same order as 'sum(num_i) desc' & 'min(num_i) desc')
       //
       // So all of these re/sort options should produce identical output (since the num buckets is < limit)
       // - Testing "index" sort allows the randomized use of "stream" processor as default to be tested.
-      // - Testing (re)sorts on other stats sanity checks code paths where relatedness() is a "defered" Agg
+      // - Testing (re)sorts on other stats sanity checks code paths where relatedness() is a "deferred" Agg
       for (String limit : Arrays.asList(", ", ", limit:5, ", ", limit:-1, ")) {
         // results shouldn't change regardless of our limit param"
         assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
@@ -524,7 +520,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
                  + "             background_popularity: 0.33333, },"
                  + "   }, "
                  + "   { val:'B', count:3, y:-3.0, z:-5, "
-                 + "     skg : { relatedness: 0.0, " // perfectly average and uncorrolated
+                 + "     skg : { relatedness: 0.0, " // perfectly average and uncorrelated
                  //+ "             foreground_count: 1, "
                  //+ "             foreground_size: 2, "
                  //+ "             background_count: 3, "
@@ -1003,11 +999,14 @@ public class TestJsonFacets extends SolrTestCaseHS {
     // test streaming
     assertJQ(req("q", "*:*", "rows", "0"
             , "json.facet", "{   cat:{terms:{field:'cat_s', method:stream }}" + // won't stream; need sort:index asc
-                              ", cat2:{terms:{field:'cat_s', method:stream, sort:'index asc' }}" +
-                              ", cat3:{terms:{field:'cat_s', method:stream, sort:'index asc', mincount:3 }}" + // mincount
-                              ", cat4:{terms:{field:'cat_s', method:stream, sort:'index asc', prefix:B }}" + // prefix
-                              ", cat5:{terms:{field:'cat_s', method:stream, sort:'index asc', offset:1 }}" + // offset
-                " }"
+            ", cat2:{terms:{field:'cat_s', method:stream, sort:'index asc' }}" +
+            ", cat3:{terms:{field:'cat_s', method:stream, sort:'index asc', mincount:3 }}" + // mincount
+            ", cat4:{terms:{field:'cat_s', method:stream, sort:'index asc', prefix:B }}" + // prefix
+            ", cat5:{terms:{field:'cat_s', method:stream, sort:'index asc', offset:1 }}" + // offset
+            ", cat6:{terms:{field:'cat_s', method:stream, sort:'index asc', missing:true }}" + // missing
+            ", cat7:{terms:{field:'cat_s', method:stream, sort:'index asc', numBuckets:true }}" + // numBuckets
+            ", cat8:{terms:{field:'cat_s', method:stream, sort:'index asc', allBuckets:true }}" + // allBuckets
+            " }"
         )
         , "facets=={count:6 " +
             ", cat :{buckets:[{val:B, count:3},{val:A, count:2}]}" +
@@ -1015,6 +1014,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
             ", cat3:{buckets:[{val:B, count:3}]}" +
             ", cat4:{buckets:[{val:B, count:3}]}" +
             ", cat5:{buckets:[{val:B, count:3}]}" +
+            ", cat6:{missing:{count:1}, buckets:[{val:A, count:2},{val:B, count:3}]}" +
+            ", cat7:{numBuckets:2, buckets:[{val:A, count:2},{val:B, count:3}]}" +
+            ", cat8:{allBuckets:{count:5}, buckets:[{val:A, count:2},{val:B, count:3}]}" +
             " }"
     );
 
diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc b/solr/solr-ref-guide/src/json-facet-api.adoc
index bc336c3..5e83807 100644
--- a/solr/solr-ref-guide/src/json-facet-api.adoc
+++ b/solr/solr-ref-guide/src/json-facet-api.adoc
@@ -220,7 +220,7 @@ This parameter indicates the facet algorithm to use:
 * "uif" UnInvertedField, collect into ordinal array
 * "dvhash" DocValues, collect into hash - improves efficiency over high cardinality fields
 * "enum" TermsEnum then intersect DocSet (stream-able)
-* "stream" Presently equivalent to "enum"
+* "stream" Presently equivalent to "enum" - used for indexed, non-point fields with sort 'index asc' and allBuckets, numBuckets, missing disabled.
 * "smart" Pick the best method for the field type (this is the default)
 
 |prelim_sort |An optional parameter for specifying an approximation of the final `sort` to use during initial collection of top buckets when the <<json-facet-api.adoc#sorting-facets-by-nested-functions,`sort` parameter is very costly>>.

[lucene-solr] 02/03: SOLR-12539: handle extra spaces in JSON facet shorthand syntax

Posted by mu...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

munendrasn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit d7fd3d8c20e862048e749528f09fae8087a03509
Author: Munendra S N <mu...@apache.org>
AuthorDate: Wed Nov 25 12:21:19 2020 +0530

    SOLR-12539: handle extra spaces in JSON facet shorthand syntax
---
 solr/CHANGES.txt                                                   | 3 +++
 solr/core/src/java/org/apache/solr/search/facet/FacetParser.java   | 7 +++++--
 .../src/test/org/apache/solr/search/facet/RangeFacetCloudTest.java | 6 ------
 .../core/src/test/org/apache/solr/search/facet/TestJsonFacets.java | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e9d54f1..b6e74bb 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -307,6 +307,9 @@ Bug Fixes
   matches the dynamic rule. The copyFields are rebuilt with replace-field and replace-field-type, if required
   (Andrew Shumway, Munendra S N)
 
+* SOLR-12539: Handle parsing of values for 'other', 'include', and 'excludeTags' in JSON facets when specified as
+  comma-separated values with extra spaces (hossman, Munendra S N)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetParser.java b/solr/core/src/java/org/apache/solr/search/facet/FacetParser.java
index d8bb697..393dc59 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetParser.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetParser.java
@@ -20,6 +20,7 @@ import java.util.List;
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.Optional;
+import java.util.stream.Collectors;
 
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
@@ -388,8 +389,10 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
       return (List<String>)o;
     }
     if (o instanceof String) {
-      // TODO: SOLR-12539 handle spaces in b/w comma & value ie, should the values be trimmed before returning??
-      return StrUtils.splitSmart((String)o, ",", decode);
+      return StrUtils.splitSmart((String)o, ",", decode).stream()
+          .map(String::trim)
+          .filter(s -> !s.isEmpty())
+          .collect(Collectors.toList());
     }
 
     throw err("Expected list of string or comma separated string values for '" + paramName +
diff --git a/solr/core/src/test/org/apache/solr/search/facet/RangeFacetCloudTest.java b/solr/core/src/test/org/apache/solr/search/facet/RangeFacetCloudTest.java
index 009b864..bfc3a48 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/RangeFacetCloudTest.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/RangeFacetCloudTest.java
@@ -936,12 +936,6 @@ public class RangeFacetCloudTest extends SolrCloudTestCase {
       // - a JSON list of items (conveniently the default toString of EnumSet),
       // - a single quoted string containing the comma separated list
       val = val.replaceAll("\\[|\\]","'");
-
-      // HACK: work around SOLR-12539...
-      //
-      // when sending a single string containing a comma separated list of values, JSON Facets 'other'
-      // parsing can't handle any leading (or trailing?) whitespace
-      val = val.replaceAll("\\s","");
     }
     return ", other:" + val;
   }
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index bf622b4..c5a7a1d 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -2203,7 +2203,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
             , "json.facet", "{ processEmpty:true," +
                 " f1:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +
                 ",f2:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:abc }}" +
-                ",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz,abc,qaz' }}" +
+                ",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz ,abc ,qaz' }}" +
                 ",f4:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz , abc , qaz] }}" +
                 ",f5:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +    // this is repeated, but it did fail when a single context was shared among sub-facets
                 ",f6:{query:{q:'${cat_s}:B', facet:{processEmpty:true, nj:{query:'${where_s}:NJ'}, ny:{ type:query, q:'${where_s}:NY', excludeTags:abc}}  }}" +  // exclude in a sub-facet

[lucene-solr] 01/03: SOLR-14950: fix regenerating of copyfield with explicit src/dest matching dyn rule

Posted by mu...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

munendrasn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 0846da5c22809a749d6dd858100df415eed544e4
Author: Munendra S N <mu...@apache.org>
AuthorDate: Wed Nov 25 08:52:38 2020 +0530

    SOLR-14950: fix regenerating of copyfield with explicit src/dest matching dyn rule
    
    CopyFields are regenerated in case of replace-field or replace-field-type.
    While regenerating, source and destionation are checked against fields but source/dest
    could match dynamic rule too.
    For example,
    <copyField source="something_s" dest="spellcheck"/>
    <dynamicField name="*_s" type="string"/>
    here, something_s is not present in schema but matches the dynamic rule.
    
    To handle the above case, need to check dynamicFieldCache too while regenerating the
    copyFields
---
 solr/CHANGES.txt                                   |   6 +-
 .../org/apache/solr/schema/ManagedIndexSchema.java |   5 +-
 .../apache/solr/rest/schema/TestBulkSchemaAPI.java | 102 +++++++++++++++++++++
 3 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index edc6b22..e9d54f1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -301,7 +301,11 @@ Bug Fixes
 
 * SOLR-15047: Fix collapse parser behavior when collapsing on numeric fields to differentiate '0' group from null group (hossman)
 
- * SOLR-10860: Return proper error code for bad input in case of inplace updates (Tomas Eduardo Fernandez Lobbe, Munendra S N)
+* SOLR-10860: Return proper error code for bad input in case of inplace updates (Tomas Eduardo Fernandez Lobbe, Munendra S N)
+
+* SOLR-14950: Fix error in copyField regeneration when explicit source/destination is not present in schema but
+  matches the dynamic rule. The copyFields are rebuilt with replace-field and replace-field-type, if required
+  (Andrew Shumway, Munendra S N)
 
 Other Changes
 ---------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java
index dd17c5b..ddabd25 100644
--- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java
@@ -936,8 +936,9 @@ public final class ManagedIndexSchema extends IndexSchema {
   private void rebuildCopyFields(List<CopyField> oldCopyFields) {
     if (oldCopyFields.size() > 0) {
       for (CopyField copyField : oldCopyFields) {
-        SchemaField source = fields.get(copyField.getSource().getName());
-        SchemaField destination = fields.get(copyField.getDestination().getName());
+        // source or destination either could be explicit field which matches dynamic rule
+        SchemaField source = getFieldOrNull(copyField.getSource().getName());
+        SchemaField destination = getFieldOrNull(copyField.getDestination().getName());
         registerExplicitSrcAndDestFields
             (copyField.getSource().getName(), copyField.getMaxChars(), destination, source);
       }
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
index 6cef7cd..32795f4 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
@@ -773,6 +773,108 @@ public class TestBulkSchemaAPI extends RestTestBase {
     assertTrue("'bleh_s' copyField rule exists in the schema", l.isEmpty());
   }
 
+  @SuppressWarnings({"rawtypes"})
+  public void testCopyFieldWithReplace() throws Exception {
+    RestTestHarness harness = restTestHarness;
+    String newFieldName = "test_solr_14950";
+
+    // add-field-type
+    String addFieldTypeAnalyzer = "{\n" +
+        "'add-field-type' : {" +
+        "    'name' : 'myNewTextField',\n" +
+        "    'class':'solr.TextField',\n" +
+        "    'analyzer' : {\n" +
+        "        'charFilters' : [{\n" +
+        "                'name':'patternReplace',\n" +
+        "                'replacement':'$1$1',\n" +
+        "                'pattern':'([a-zA-Z])\\\\\\\\1+'\n" +
+        "            }],\n" +
+        "        'tokenizer' : { 'name':'whitespace' },\n" +
+        "        'filters' : [{ 'name':'asciiFolding' }]\n" +
+        "    }\n"+
+        "}}";
+
+    String response = restTestHarness.post("/schema", json(addFieldTypeAnalyzer));
+    Map map = (Map) fromJSONString(response);
+    assertNull(response, map.get("error"));
+    map = getObj(harness, "myNewTextField", "fieldTypes");
+    assertNotNull("'myNewTextField' field type does not exist in the schema", map);
+
+    // add-field
+    String payload = "{\n" +
+        "    'add-field' : {\n" +
+        "                 'name':'" + newFieldName + "',\n" +
+        "                 'type':'myNewTextField',\n" +
+        "                 'stored':true,\n" +
+        "                 'indexed':true\n" +
+        "                 }\n" +
+        "    }";
+
+    response = harness.post("/schema", json(payload));
+
+    map = (Map) fromJSONString(response);
+    assertNull(response, map.get("error"));
+
+    Map m = getObj(harness, newFieldName, "fields");
+    assertNotNull("'"+ newFieldName + "' field does not exist in the schema", m);
+
+    // add copy-field with explicit source and destination
+    List l = getSourceCopyFields(harness, "bleh_s");
+    assertTrue("'bleh_s' copyField rule exists in the schema", l.isEmpty());
+
+    payload = "{\n" +
+        "          'add-copy-field' : {\n" +
+        "                       'source' :'bleh_s',\n" +
+        "                       'dest':'"+ newFieldName + "'\n" +
+        "                       }\n" +
+        "          }\n";
+    response = harness.post("/schema", json(payload));
+
+    map = (Map) fromJSONString(response);
+    assertNull(response, map.get("error"));
+
+    l = getSourceCopyFields(harness, "bleh_s");
+    assertFalse("'bleh_s' copyField rule doesn't exist", l.isEmpty());
+    assertEquals("bleh_s", ((Map)l.get(0)).get("source"));
+    assertEquals(newFieldName, ((Map)l.get(0)).get("dest"));
+
+    // replace-field-type
+    String replaceFieldTypeAnalyzer = "{\n" +
+        "'replace-field-type' : {" +
+        "    'name' : 'myNewTextField',\n" +
+        "    'class':'solr.TextField',\n" +
+        "    'analyzer' : {\n" +
+        "        'tokenizer' : { 'name':'whitespace' },\n" +
+        "        'filters' : [{ 'name':'asciiFolding' }]\n" +
+        "    }\n"+
+        "}}";
+
+    response = restTestHarness.post("/schema", json(replaceFieldTypeAnalyzer));
+    map = (Map) fromJSONString(response);
+    assertNull(response, map.get("error"));
+
+    map = getObj(restTestHarness, "myNewTextField", "fieldTypes");
+    assertNotNull(map);
+    Map analyzer = (Map)map.get("analyzer");
+    assertNull("'myNewTextField' shouldn't contain charFilters", analyzer.get("charFilters"));
+
+    l = getSourceCopyFields(harness, "bleh_s");
+    assertFalse("'bleh_s' copyField rule doesn't exist", l.isEmpty());
+    assertEquals("bleh_s", ((Map)l.get(0)).get("source"));
+    assertEquals(newFieldName, ((Map)l.get(0)).get("dest"));
+
+    // with replace-field
+    String replaceField = "{'replace-field' : {'name':'" + newFieldName + "', 'type':'string'}}";
+    response = harness.post("/schema", json(replaceField));
+    map = (Map) fromJSONString(response);
+    assertNull(map.get("error"));
+
+    l = getSourceCopyFields(harness, "bleh_s");
+    assertFalse("'bleh_s' copyField rule doesn't exist", l.isEmpty());
+    assertEquals("bleh_s", ((Map)l.get(0)).get("source"));
+    assertEquals(newFieldName, ((Map)l.get(0)).get("dest"));
+  }
+
   @SuppressWarnings({"unchecked", "rawtypes"})
   public void testDeleteAndReplace() throws Exception {
     RestTestHarness harness = restTestHarness;