You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2019/02/22 19:04:36 UTC
[lucene-solr] branch master updated: SOLR-13261: Make
SortableTextField work with export/streaming
This is an automated email from the ASF dual-hosted git repository.
erick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 6b4e906 SOLR-13261: Make SortableTextField work with export/streaming
6b4e906 is described below
commit 6b4e90617ddb5a9897070bc60e2c6e78d8488f12
Author: Erick Erickson <Er...@gmail.com>
AuthorDate: Fri Feb 22 11:04:31 2019 -0800
SOLR-13261: Make SortableTextField work with export/streaming
---
solr/CHANGES.txt | 2 +
.../apache/solr/handler/export/ExportWriter.java | 9 +++--
.../collection1/conf/schema-sortingresponse.xml | 25 +++++++++++-
.../configsets/cloud-managed/conf/managed-schema | 12 ++++++
.../apache/solr/cloud/DocValuesNotIndexedTest.java | 44 ++++++++++++----------
.../solr/handler/export/TestExportWriter.java | 32 ++++++++++++++--
6 files changed, 97 insertions(+), 27 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bc5fdb2..2a763ff 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -58,6 +58,8 @@ New Features
* SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble)
+* SOLR-13261: Make SortableTextField work with export/streaming
+
Bug Fixes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
index 2c1ab96..c80cae3 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
@@ -58,6 +58,7 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IntValueFieldType;
import org.apache.solr.schema.LongValueFieldType;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.SortableTextField;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
@@ -358,7 +359,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} else {
writers[i] = new DoubleFieldWriter(field);
}
- } else if (fieldType instanceof StrField) {
+ } else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else {
@@ -377,7 +378,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
writers[i] = new BoolFieldWriter(field, fieldType);
}
} else {
- throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
+ throw new IOException("Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
}
}
return writers;
@@ -421,7 +422,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} else {
sortValues[i] = new LongValue(field, new LongAsc());
}
- } else if (ft instanceof StrField) {
+ } else if (ft instanceof StrField || ft instanceof SortableTextField) {
LeafReader reader = searcher.getSlowAtomicReader();
SortedDocValues vals = reader.getSortedDocValues(field);
if (reverse) {
@@ -447,7 +448,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
sortValues[i] = new StringValue(vals, field, new IntAsc());
}
} else {
- throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean");
+ throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
}
}
//SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml
index 57e684d..281e698 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml
@@ -26,7 +26,7 @@
seconds part (.999) is optional.
-->
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
-
+
<!-- Point Fields -->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
@@ -40,6 +40,27 @@
<fieldType name="uuid" class="solr.UUIDField"/>
+
+ <!-- SortableTextField generaly functions exactly like TextField,
+ except that it supports, and by default uses, docValues for sorting (or faceting)
+ on the first 1024 characters of the original field values (which is configurable).
+
+ This makes it a bit more useful then TextField in many situations, but the trade-off
+ is that it takes up more space on disk; which is why it's not used in place of TextField
+ for every fieldType in this _default schema.
+-->
+ <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+
<field name="id" type="string" required="true" indexed="true" docValues="true"/>
<field name="floatdv_m" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="intdv_m" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
@@ -47,6 +68,7 @@
<field name="longdv_m" type="long" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="datedv_m" type="date" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="stringdv_m" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
+ <field name="sortabledv_m" type="text_gen_sort" indexed="true" stored="true" multiValued="true" />
<field name="floatdv" type="float" indexed="false" stored="false" docValues="true"/>
<field name="intdv" type="int" indexed="false" stored="false" docValues="true"/>
@@ -55,6 +77,7 @@
<field name="datedv" type="date" indexed="false" stored="false" docValues="true"/>
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true"/>
<field name="booleandv" type="boolean" indexed="false" stored="false" docValues="true" />
+ <field name="sortabledv" type="text_gen_sort" indexed="true" stored="true" multiValued="false" />
<dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="false"/>
diff --git a/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema b/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema
index 7ce25e9..455cb55 100644
--- a/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema
+++ b/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema
@@ -23,5 +23,17 @@
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="id" type="string" indexed="true" stored="true"/>
+
+ <fieldType name="sortabletext" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
<uniqueKey>id</uniqueKey>
</schema>
diff --git a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
index f396a5d..5620831 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
@@ -98,7 +98,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatField", "float", 1),
new FieldProps("dateField", "date", 1),
new FieldProps("stringField", "string", 1),
- new FieldProps("boolField", "boolean", 1)
+ new FieldProps("boolField", "boolean", 1),
+ new FieldProps("sortableText", "sortabletext", 1)
));
fieldsToTestMulti =
@@ -109,10 +110,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatFieldMulti", "float", 5),
new FieldProps("dateFieldMulti", "date", 5),
new FieldProps("stringFieldMulti", "string", 5),
- new FieldProps("boolFieldMulti", "boolean", 2)
+ new FieldProps("boolFieldMulti", "boolean", 2),
+ new FieldProps("sortableFieldMulti", "sortabletext", 5)
));
- // Fields to test for grouping and sorting with sortMinssingFirst/Last.
+ // Fields to test for grouping and sorting with sortMissingFirst/Last.
fieldsToTestGroupSortFirst =
Collections.unmodifiableList(Arrays.asList(
new FieldProps("intGSF", "int"),
@@ -121,7 +123,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatGSF", "float"),
new FieldProps("dateGSF", "date"),
new FieldProps("stringGSF", "string"),
- new FieldProps("boolGSF", "boolean")
+ new FieldProps("boolGSF", "boolean"),
+ new FieldProps("sortableGSF", "sortabletext")
));
fieldsToTestGroupSortLast =
@@ -132,7 +135,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatGSL", "float"),
new FieldProps("dateGSL", "date"),
new FieldProps("stringGSL", "string"),
- new FieldProps("boolGSL", "boolean")
+ new FieldProps("boolGSL", "boolean"),
+ new FieldProps("sortableGSL", "sortabletext")
));
List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
@@ -210,18 +214,18 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
for (FieldProps props : fieldsToTestSingle) {
- testFacet(props, rsp);
+ doTestFacet(props, rsp);
}
for (FieldProps props : fieldsToTestMulti) {
- testFacet(props, rsp);
+ doTestFacet(props, rsp);
}
}
// We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
@Test
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
+ //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
public void testGroupingSorting() throws IOException, SolrServerException {
CloudSolrClient client = cluster.getSolrClient();
@@ -314,12 +318,17 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
// 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
// commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
public void testGroupingDVOnly() throws IOException, SolrServerException {
+ doGroupingDvOnly(fieldsToTestGroupSortFirst, "boolGSF");
+ doGroupingDvOnly(fieldsToTestGroupSortLast, "boolGSL");
+ }
+ private void doGroupingDvOnly(List<FieldProps> fieldProps, String boolName) throws IOException, SolrServerException {
+
List<SolrInputDocument> docs = new ArrayList<>(50);
for (int idx = 0; idx < 49; ++idx) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", idx);
boolean doInc = ((idx % 7) == 0);
- for (FieldProps prop : fieldsToTestGroupSortFirst) {
+ for (FieldProps prop : fieldProps) {
doc.addField(prop.getName(), prop.getValue(doInc));
}
docs.add(doc);
@@ -337,13 +346,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
.commit(client, COLLECTION);
// OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
- for (FieldProps prop : fieldsToTestGroupSortFirst) {
- // Special handling until SOLR-9802 is fixed
+ for (FieldProps prop : fieldProps) {
+
+ // Solr 9802
if (prop.getName().startsWith("date")) continue;
- // SOLR-9802 to here
-
- // TODO: gsf fails this
- if (prop.getName().endsWith("GSF") ) continue;
final SolrQuery solrQuery = new SolrQuery(
"q", "*:*",
@@ -376,7 +382,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
case 25:
case 24:
++boolCount;
- assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName());
+ assertEquals("We should have more counts for boolean fields!", boolName, prop.getName());
break;
default:
@@ -442,7 +448,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
}
- private void testFacet(FieldProps props, QueryResponse rsp) {
+ private void doTestFacet(FieldProps props, QueryResponse rsp) {
String name = props.getName();
final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
long expectedCount = props.getExpectedCount();
@@ -483,7 +489,7 @@ class FieldProps {
base = Math.abs(random().nextLong());
} else if (name.startsWith("bool")) {
base = true; // Must start with a known value since bools only have a two values....
- } else if (name.startsWith("string")) {
+ } else if (name.startsWith("string") || name.startsWith("sortable")) {
base = "base_string_" + random().nextInt(1_000_000) + "_";
} else {
throw new RuntimeException("Should have found a prefix for the field before now!");
@@ -531,7 +537,7 @@ class FieldProps {
base = !((boolean) base);
return ret;
}
- if (name.startsWith("string")) {
+ if (name.startsWith("string") || name.startsWith("sortable")) {
return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
}
throw new RuntimeException("Should have found a prefix for the field before now!");
diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java
index f28f7dc..4cebb12 100644
--- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java
+++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java
@@ -129,7 +129,10 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"datedv_m", "2017-06-16T01:00:00Z",
"datedv_m", "2017-06-16T02:00:00Z",
"datedv_m", "2017-06-16T03:00:00Z",
- "datedv_m", "2017-06-16T04:00:00Z"));
+ "datedv_m", "2017-06-16T04:00:00Z",
+ "sortabledv_m", "this is some text one_1",
+ "sortabledv_m", "this is some text two_1",
+ "sortabledv_m", "this is some text three_1"));
assertU(adoc("id","7",
"floatdv","2.1",
@@ -166,7 +169,8 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"int_is_t", "1",
"int_is_t", "1",
"int_is_t", "1",
- "int_is_t", "1"));
+ "int_is_t", "1",
+ "sortabledv", "this is some text_1"));
assertU(commit());
assertU(adoc("id","8",
"floatdv","2.1",
@@ -191,7 +195,11 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"int_is_p", "1",
"int_is_p", "1",
"int_is_p", "1",
- "int_is_p", "1"));
+ "int_is_p", "1",
+ "sortabledv", "this is some text_2",
+ "sortabledv_m", "this is some text one_2",
+ "sortabledv_m", "this is some text two_2",
+ "sortabledv_m", "this is some text three_2"));
assertU(commit());
@@ -491,6 +499,24 @@ public class TestExportWriter extends SolrTestCaseJ4 {
s = h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc"));
assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}");
+
+ // Test sortable text fields:
+ s = h.query(req("q", "id:(1 OR 3 OR 8)", "qt", "/export", "fl", "sortabledv_m,sortabledv", "sort", "sortabledv asc"));
+ assertJsonEquals(s, "{\n" +
+ " \"responseHeader\":{\"status\":0},\n" +
+ " \"response\":{\n" +
+ " \"numFound\":3,\n" +
+ " \"docs\":[{\n" +
+ " \"sortabledv_m\":[\"this is some text one_1\"\n" +
+ " ,\"this is some text three_1\"\n" +
+ " ,\"this is some text two_1\"]}\n" +
+ " ,{\n" +
+ " \"sortabledv\":\"this is some text_1\"}\n" +
+ " ,{\n" +
+ " \"sortabledv_m\":[\"this is some text one_2\"\n" +
+ " ,\"this is some text three_2\"\n" +
+ " ,\"this is some text two_2\"],\n" +
+ " \"sortabledv\":\"this is some text_2\"}]}}");
}
private void assertJsonEquals(String actual, String expected) {