You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2019/02/22 19:04:36 UTC

[lucene-solr] branch master updated: SOLR-13261: Make SortableTextField work with export/streaming

This is an automated email from the ASF dual-hosted git repository.

erick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 6b4e906  SOLR-13261: Make SortableTextField work with export/streaming
6b4e906 is described below

commit 6b4e90617ddb5a9897070bc60e2c6e78d8488f12
Author: Erick Erickson <Er...@gmail.com>
AuthorDate: Fri Feb 22 11:04:31 2019 -0800

    SOLR-13261: Make SortableTextField work with export/streaming
---
 solr/CHANGES.txt                                   |  2 +
 .../apache/solr/handler/export/ExportWriter.java   |  9 +++--
 .../collection1/conf/schema-sortingresponse.xml    | 25 +++++++++++-
 .../configsets/cloud-managed/conf/managed-schema   | 12 ++++++
 .../apache/solr/cloud/DocValuesNotIndexedTest.java | 44 ++++++++++++----------
 .../solr/handler/export/TestExportWriter.java      | 32 ++++++++++++++--
 6 files changed, 97 insertions(+), 27 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bc5fdb2..2a763ff 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -58,6 +58,8 @@ New Features
 
 * SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble)
 
+* SOLR-13261: Make SortableTextField work with export/streaming
+
 Bug Fixes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
index 2c1ab96..c80cae3 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
@@ -58,6 +58,7 @@ import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.IntValueFieldType;
 import org.apache.solr.schema.LongValueFieldType;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.SortableTextField;
 import org.apache.solr.schema.StrField;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SortSpec;
@@ -358,7 +359,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
         } else {
           writers[i] = new DoubleFieldWriter(field);
         }
-      } else if (fieldType instanceof StrField) {
+      } else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) {
         if (multiValued) {
           writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
         } else {
@@ -377,7 +378,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
           writers[i] = new BoolFieldWriter(field, fieldType);
         }
       } else {
-        throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
+        throw new IOException("Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
       }
     }
     return writers;
@@ -421,7 +422,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
         } else {
           sortValues[i] = new LongValue(field, new LongAsc());
         }
-      } else if (ft instanceof StrField) {
+      } else if (ft instanceof StrField || ft instanceof SortableTextField) {
         LeafReader reader = searcher.getSlowAtomicReader();
         SortedDocValues vals = reader.getSortedDocValues(field);
         if (reverse) {
@@ -447,7 +448,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
           sortValues[i] = new StringValue(vals, field, new IntAsc());
         }
       } else {
-        throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean");
+        throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
       }
     }
     //SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml
index 57e684d..281e698 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml
@@ -26,7 +26,7 @@
        seconds part (.999) is optional.
     -->
   <fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
-  
+
   <!-- Point Fields -->
   <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
   <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
@@ -40,6 +40,27 @@
 
   <fieldType name="uuid" class="solr.UUIDField"/>
 
+
+  <!-- SortableTextField generaly functions exactly like TextField,
+     except that it supports, and by default uses, docValues for sorting (or faceting)
+     on the first 1024 characters of the original field values (which is configurable).
+
+     This makes it a bit more useful then TextField in many situations, but the trade-off
+     is that it takes up more space on disk; which is why it's not used in place of TextField
+     for every fieldType in this _default schema.
+-->
+  <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
+    <analyzer type="index">
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+
   <field name="id" type="string" required="true" indexed="true" docValues="true"/>
   <field name="floatdv_m" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
   <field name="intdv_m" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
@@ -47,6 +68,7 @@
   <field name="longdv_m" type="long" indexed="false" stored="false" docValues="true" multiValued="true"/>
   <field name="datedv_m" type="date" indexed="false" stored="false" docValues="true" multiValued="true"/>
   <field name="stringdv_m" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
+  <field name="sortabledv_m" type="text_gen_sort" indexed="true" stored="true" multiValued="true" />
 
   <field name="floatdv" type="float" indexed="false" stored="false" docValues="true"/>
   <field name="intdv" type="int" indexed="false" stored="false" docValues="true"/>
@@ -55,6 +77,7 @@
   <field name="datedv" type="date" indexed="false" stored="false" docValues="true"/>
   <field name="stringdv" type="string" indexed="false" stored="false" docValues="true"/>
   <field name="booleandv" type="boolean" indexed="false" stored="false" docValues="true" />
+  <field name="sortabledv" type="text_gen_sort" indexed="true" stored="true" multiValued="false" />
 
   <dynamicField name="*_s_dv"   type="string"    indexed="true"  stored="true" docValues="true" multiValued="false"/>
 
diff --git a/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema b/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema
index 7ce25e9..455cb55 100644
--- a/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema
+++ b/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema
@@ -23,5 +23,17 @@
   <field name="_version_" type="long" indexed="true" stored="true"/>
   <field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
   <field name="id" type="string" indexed="true" stored="true"/>
+
+  <fieldType name="sortabletext" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
+    <analyzer type="index">
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
   <uniqueKey>id</uniqueKey>
 </schema>
diff --git a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
index f396a5d..5620831 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
@@ -98,7 +98,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("floatField", "float", 1),
             new FieldProps("dateField", "date", 1),
             new FieldProps("stringField", "string", 1),
-            new FieldProps("boolField", "boolean", 1)
+            new FieldProps("boolField", "boolean", 1),
+            new FieldProps("sortableText", "sortabletext", 1)
         ));
 
     fieldsToTestMulti =
@@ -109,10 +110,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("floatFieldMulti", "float", 5),
             new FieldProps("dateFieldMulti", "date", 5),
             new FieldProps("stringFieldMulti", "string", 5),
-            new FieldProps("boolFieldMulti", "boolean", 2)
+            new FieldProps("boolFieldMulti", "boolean", 2),
+            new FieldProps("sortableFieldMulti", "sortabletext", 5)
         ));
 
-    // Fields to test for grouping and sorting with sortMinssingFirst/Last.
+    // Fields to test for grouping and sorting with sortMissingFirst/Last.
     fieldsToTestGroupSortFirst =
         Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intGSF", "int"),
@@ -121,7 +123,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("floatGSF", "float"),
             new FieldProps("dateGSF", "date"),
             new FieldProps("stringGSF", "string"),
-            new FieldProps("boolGSF", "boolean")
+            new FieldProps("boolGSF", "boolean"),
+            new FieldProps("sortableGSF", "sortabletext")
         ));
 
     fieldsToTestGroupSortLast =
@@ -132,7 +135,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("floatGSL", "float"),
             new FieldProps("dateGSL", "date"),
             new FieldProps("stringGSL", "string"),
-            new FieldProps("boolGSL", "boolean")
+            new FieldProps("boolGSL", "boolean"),
+            new FieldProps("sortableGSL", "sortabletext")
         ));
 
     List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
@@ -210,18 +214,18 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
     final QueryResponse rsp = client.query(COLLECTION, solrQuery);
 
     for (FieldProps props : fieldsToTestSingle) {
-      testFacet(props, rsp);
+      doTestFacet(props, rsp);
     }
 
     for (FieldProps props : fieldsToTestMulti) {
-      testFacet(props, rsp);
+      doTestFacet(props, rsp);
     }
 
   }
 
   // We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
   @Test
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
+  //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
   public void testGroupingSorting() throws IOException, SolrServerException {
     CloudSolrClient client = cluster.getSolrClient();
 
@@ -314,12 +318,17 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
   // 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
   // commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
   public void testGroupingDVOnly() throws IOException, SolrServerException {
+    doGroupingDvOnly(fieldsToTestGroupSortFirst, "boolGSF");
+    doGroupingDvOnly(fieldsToTestGroupSortLast, "boolGSL");
+  }
+  private void doGroupingDvOnly(List<FieldProps> fieldProps, String boolName) throws IOException, SolrServerException {
+
     List<SolrInputDocument> docs = new ArrayList<>(50);
     for (int idx = 0; idx < 49; ++idx) {
       SolrInputDocument doc = new SolrInputDocument();
       doc.addField("id", idx);
       boolean doInc = ((idx % 7) == 0);
-      for (FieldProps prop : fieldsToTestGroupSortFirst) {
+      for (FieldProps prop : fieldProps) {
         doc.addField(prop.getName(), prop.getValue(doInc));
       }
       docs.add(doc);
@@ -337,13 +346,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
         .commit(client, COLLECTION);
 
     // OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
-    for (FieldProps prop : fieldsToTestGroupSortFirst) {
-      // Special handling until SOLR-9802 is fixed
+    for (FieldProps prop : fieldProps) {
+
+      // Solr 9802
       if (prop.getName().startsWith("date")) continue;
-      // SOLR-9802 to here
-      
-      // TODO: gsf fails this
-      if (prop.getName().endsWith("GSF") ) continue;
 
       final SolrQuery solrQuery = new SolrQuery(
           "q", "*:*",
@@ -376,7 +382,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             case 25:
             case 24:
               ++boolCount;
-              assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName());
+              assertEquals("We should have more counts for boolean fields!", boolName, prop.getName());
               break;
             
             default:
@@ -442,7 +448,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
   }
 
 
-  private void testFacet(FieldProps props, QueryResponse rsp) {
+  private void doTestFacet(FieldProps props, QueryResponse rsp) {
     String name = props.getName();
     final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
     long expectedCount = props.getExpectedCount();
@@ -483,7 +489,7 @@ class FieldProps {
       base = Math.abs(random().nextLong());
     } else if (name.startsWith("bool")) {
       base = true; // Must start with a known value since bools only have a two values....
-    } else if (name.startsWith("string")) {
+    } else if (name.startsWith("string") || name.startsWith("sortable")) {
       base = "base_string_" + random().nextInt(1_000_000) + "_";
     } else {
       throw new RuntimeException("Should have found a prefix for the field before now!");
@@ -531,7 +537,7 @@ class FieldProps {
       base = !((boolean) base);
       return ret;
     }
-    if (name.startsWith("string")) {
+    if (name.startsWith("string") || name.startsWith("sortable")) {
       return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
     }
     throw new RuntimeException("Should have found a prefix for the field before now!");
diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java
index f28f7dc..4cebb12 100644
--- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java
+++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java
@@ -129,7 +129,10 @@ public class TestExportWriter extends SolrTestCaseJ4 {
                  "datedv_m", "2017-06-16T01:00:00Z",
                  "datedv_m", "2017-06-16T02:00:00Z",
                  "datedv_m", "2017-06-16T03:00:00Z",
-                 "datedv_m", "2017-06-16T04:00:00Z"));
+                 "datedv_m", "2017-06-16T04:00:00Z",
+                 "sortabledv_m", "this is some text one_1",
+                 "sortabledv_m", "this is some text two_1",
+                 "sortabledv_m", "this is some text three_1"));
 
     assertU(adoc("id","7",
         "floatdv","2.1",
@@ -166,7 +169,8 @@ public class TestExportWriter extends SolrTestCaseJ4 {
         "int_is_t", "1",
         "int_is_t", "1",
         "int_is_t", "1",
-        "int_is_t", "1"));
+        "int_is_t", "1",
+        "sortabledv", "this is some text_1"));
     assertU(commit());
     assertU(adoc("id","8",
         "floatdv","2.1",
@@ -191,7 +195,11 @@ public class TestExportWriter extends SolrTestCaseJ4 {
         "int_is_p", "1",
         "int_is_p", "1",
         "int_is_p", "1",
-        "int_is_p", "1"));
+        "int_is_p", "1",
+        "sortabledv", "this is some text_2",
+        "sortabledv_m", "this is some text one_2",
+        "sortabledv_m", "this is some text two_2",
+        "sortabledv_m", "this is some text three_2"));
     assertU(commit());
 
 
@@ -491,6 +499,24 @@ public class TestExportWriter extends SolrTestCaseJ4 {
 
     s =  h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc"));
     assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}");
+
+    // Test sortable text fields:
+    s =  h.query(req("q", "id:(1 OR 3 OR 8)", "qt", "/export", "fl", "sortabledv_m,sortabledv", "sort", "sortabledv asc"));
+    assertJsonEquals(s, "{\n" +
+        "  \"responseHeader\":{\"status\":0},\n" +
+        "  \"response\":{\n" +
+        "    \"numFound\":3,\n" +
+        "    \"docs\":[{\n" +
+        "        \"sortabledv_m\":[\"this is some text one_1\"\n" +
+        "          ,\"this is some text three_1\"\n" +
+        "          ,\"this is some text two_1\"]}\n" +
+        "      ,{\n" +
+        "        \"sortabledv\":\"this is some text_1\"}\n" +
+        "      ,{\n" +
+        "        \"sortabledv_m\":[\"this is some text one_2\"\n" +
+        "          ,\"this is some text three_2\"\n" +
+        "          ,\"this is some text two_2\"],\n" +
+        "        \"sortabledv\":\"this is some text_2\"}]}}");
   }
 
   private void assertJsonEquals(String actual, String expected) {