You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by th...@apache.org on 2021/06/29 23:05:12 UTC

[solr] branch main updated: SOLR-15489: Implement OFFSET & FETCH for LIMIT SQL queries (#191)

This is an automated email from the ASF dual-hosted git repository.

thelabdude pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 0907fff  SOLR-15489: Implement OFFSET & FETCH for LIMIT SQL queries (#191)
0907fff is described below

commit 0907fff65cb236897731972da49b7118a8c40a09
Author: Timothy Potter <th...@gmail.com>
AuthorDate: Tue Jun 29 17:03:38 2021 -0600

    SOLR-15489: Implement OFFSET & FETCH for LIMIT SQL queries (#191)
---
 solr/CHANGES.txt                                   |  2 +
 .../org/apache/solr/handler/sql/LimitStream.java   | 23 +++++-
 .../org/apache/solr/handler/sql/SolrMethod.java    |  1 +
 .../java/org/apache/solr/handler/sql/SolrRel.java  |  5 ++
 .../java/org/apache/solr/handler/sql/SolrSort.java |  6 +-
 .../org/apache/solr/handler/sql/SolrTable.java     | 44 ++++++++----
 .../handler/sql/SolrToEnumerableConverter.java     |  3 +-
 solr/core/src/test-files/log4j2.xml                |  2 +-
 .../org/apache/solr/handler/TestSQLHandler.java    | 84 ++++++++++++++++++----
 .../solr-ref-guide/src/parallel-sql-interface.adoc | 14 ++++
 10 files changed, 153 insertions(+), 31 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3c9269f..47389a4 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -151,6 +151,8 @@ when told to. The admin UI now tells it to. (Nazerke Seidan, David Smiley)
 
 * SOLR-15456: Get field type info from luke for custom fields instead of defaulting to String in Parallel SQL (Timothy Potter)
 
+* SOLR-15489: Implement OFFSET & FETCH for LIMIT SQL queries (Timothy Potter)
+
 Other Changes
 ----------------------
 * SOLR-14656: Autoscaling framework removed (Ishan Chattopadhyaya, noble, Ilan Ginzburg)
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/LimitStream.java b/solr/core/src/java/org/apache/solr/handler/sql/LimitStream.java
index 772f639..5bdf3e7 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/LimitStream.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/LimitStream.java
@@ -32,11 +32,18 @@ class LimitStream extends TupleStream {
 
   private final TupleStream stream;
   private final int limit;
+  private final int offset;
   private int count;
 
   LimitStream(TupleStream stream, int limit) {
+    this(stream, limit, 0);
+  }
+
+  LimitStream(TupleStream stream, int limit, int offset) {
     this.stream = stream;
     this.limit = limit;
+    this.offset = offset > 0 ? offset : 0;
+    this.count = 0;
   }
 
   public void open() throws IOException {
@@ -75,8 +82,20 @@ class LimitStream extends TupleStream {
   }
 
   public Tuple read() throws IOException {
-    ++count;
-    if(count > limit) {
+
+    if (count == 0 && offset > 0) {
+      // skip offset # of sorted tuples (indexes 0 to offset-1) so that the first tuple returned
+      while (count < offset) {
+        ++count; // don't increment until after the compare ...
+        Tuple skip = stream.read();
+        if (skip.EOF) {
+          return skip;
+        }
+      }
+    }
+
+    // done once we've reached the tuple after limit + offset
+    if (++count > (limit + offset)) {
       return Tuple.EOF();
     }
 
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/SolrMethod.java b/solr/core/src/java/org/apache/solr/handler/sql/SolrMethod.java
index b130fdf..6604822 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/SolrMethod.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/SolrMethod.java
@@ -34,6 +34,7 @@ enum SolrMethod {
                        List.class,
                        String.class,
                        String.class,
+                       String.class,
                        String.class);
 
   public final Method method;
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/SolrRel.java b/solr/core/src/java/org/apache/solr/handler/sql/SolrRel.java
index 370de16..3467e4a 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/SolrRel.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/SolrRel.java
@@ -40,6 +40,7 @@ interface SolrRel extends RelNode {
     String havingPredicate;
     boolean negativeQuery;
     String limitValue = null;
+    String offsetValue = null;
     final List<Pair<String, String>> orders = new ArrayList<>();
     final List<String> buckets = new ArrayList<>();
     final List<Pair<String, String>> metricPairs = new ArrayList<>();
@@ -98,6 +99,10 @@ interface SolrRel extends RelNode {
       limitValue = limit;
     }
 
+    void setOffset(String offset) {
+      this.offsetValue = offset;
+    }
+
     void visitChild(int ordinal, RelNode input) {
       assert ordinal == 0;
       ((SolrRel) input).implement(this);
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/SolrSort.java b/solr/core/src/java/org/apache/solr/handler/sql/SolrSort.java
index 1c5274a..ca161ce 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/SolrSort.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/SolrSort.java
@@ -72,8 +72,12 @@ class SolrSort extends Sort implements SolrRel {
     }
 
 
-    if(fetch != null) {
+    if (fetch != null) {
       implementor.setLimit(((RexLiteral) fetch).getValue().toString());
     }
+
+    if (offset != null && offset instanceof RexLiteral) {
+      implementor.setOffset(((RexLiteral) offset).getValue2().toString());
+    }
   }
 }
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/SolrTable.java b/solr/core/src/java/org/apache/solr/handler/sql/SolrTable.java
index dcc1302..b3b0d56 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/SolrTable.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/SolrTable.java
@@ -86,7 +86,7 @@ class SolrTable extends AbstractQueryableTable implements TranslatableTable {
   
   private Enumerable<Object> query(final Properties properties) {
     return query(properties, Collections.emptyList(), null, Collections.emptyList(), Collections.emptyList(),
-        Collections.emptyList(), null, null, null);
+        Collections.emptyList(), null, null, null, null);
   }
 
   /** Executes a Solr query on the underlying table.
@@ -104,7 +104,8 @@ class SolrTable extends AbstractQueryableTable implements TranslatableTable {
                                    final List<Pair<String, String>> metricPairs,
                                    final String limit,
                                    final String negativeQuery,
-                                   final String havingPredicate) {
+                                   final String havingPredicate,
+                                   final String offset) {
     // SolrParams should be a ModifiableParams instead of a map
     boolean mapReduce = "map_reduce".equals(properties.getProperty("aggregationMode"));
     boolean negative = Boolean.parseBoolean(negativeQuery);
@@ -125,7 +126,7 @@ class SolrTable extends AbstractQueryableTable implements TranslatableTable {
     String zk = properties.getProperty("zk");
     try {
       if (metricPairs.isEmpty() && buckets.isEmpty()) {
-        tupleStream = handleSelect(zk, collection, q, fields, orders, limit);
+        tupleStream = handleSelect(zk, collection, q, fields, orders, limit, offset);
       } else {
         if(buckets.isEmpty()) {
           tupleStream = handleStats(zk, collection, q, metricPairs, fields);
@@ -256,7 +257,8 @@ class SolrTable extends AbstractQueryableTable implements TranslatableTable {
                                    String query,
                                    List<Map.Entry<String, Class<?>>> fields,
                                    List<Pair<String, String>> orders,
-                                   String limit) throws IOException {
+                                   String limit,
+                                   String offset) throws IOException {
 
     ModifiableSolrParams params = new ModifiableSolrParams();
     params.add(CommonParams.Q, query);
@@ -275,25 +277,43 @@ class SolrTable extends AbstractQueryableTable implements TranslatableTable {
 
     String fl = getFields(fields);
 
-    if(orders.size() > 0) {
+    if (!orders.isEmpty()) {
       params.add(SORT, getSort(orders));
     } else {
-      if(limit == null) {
+      if (limit == null) {
         params.add(SORT, "_version_ desc");
         fl = fl+",_version_";
       } else {
         params.add(SORT, "score desc");
-        if(fl.indexOf("score") == -1) {
+        if (!fl.contains("score")) {
           fl = fl + ",score";
         }
       }
     }
 
     params.add(CommonParams.FL, fl);
-
+    
+    if (offset != null && limit == null) {
+      throw new IOException("OFFSET without LIMIT not supported by Solr! Specify desired limit using 'FETCH NEXT <LIMIT> ROWS ONLY'");
+    }
+    
     if (limit != null) {
-      params.add(CommonParams.ROWS, limit);
-      return new LimitStream(new CloudSolrStream(zk, collection, params), Integer.parseInt(limit));
+      int limitInt = Integer.parseInt(limit);
+      // if there's an offset, then we need to fetch offset + limit rows from each shard and then sort accordingly
+      LimitStream limitStream;
+      if (offset != null) {
+        int offsetInt = Integer.parseInt(offset);
+        int rows = limitInt + offsetInt;
+        params.add(CommonParams.START, "0"); // tricky ... we need all rows up to limit + offset
+        params.add(CommonParams.ROWS, String.valueOf(rows));
+        // re-sort all the streams back from the shards
+        StreamComparator streamSorter = new MultipleFieldComparator(getComps(orders));
+        limitStream = new LimitStream(new SortStream(new CloudSolrStream(zk, collection, params), streamSorter), limitInt, offsetInt);
+      } else {
+        params.add(CommonParams.ROWS, limit);
+        limitStream = new LimitStream(new CloudSolrStream(zk, collection, params), limitInt);
+      }
+      return limitStream;
     } else {
       params.add(CommonParams.QT, "/export");
       return new CloudSolrStream(zk, collection, params);
@@ -856,8 +876,8 @@ class SolrTable extends AbstractQueryableTable implements TranslatableTable {
      */
     @SuppressWarnings({"UnusedDeclaration"})
     public Enumerable<Object> query(List<Map.Entry<String, Class<?>>> fields, String query, List<Pair<String, String>> order,
-                                    List<String> buckets, List<Pair<String, String>> metricPairs, String limit, String negativeQuery, String havingPredicate) {
-      return getTable().query(getProperties(), fields, query, order, buckets, metricPairs, limit, negativeQuery, havingPredicate);
+                                    List<String> buckets, List<Pair<String, String>> metricPairs, String limit, String negativeQuery, String havingPredicate, String offset) {
+      return getTable().query(getProperties(), fields, query, order, buckets, metricPairs, limit, negativeQuery, havingPredicate, offset);
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/SolrToEnumerableConverter.java b/solr/core/src/java/org/apache/solr/handler/sql/SolrToEnumerableConverter.java
index 45f131c..acea8bd 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/SolrToEnumerableConverter.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/SolrToEnumerableConverter.java
@@ -85,8 +85,9 @@ class SolrToEnumerableConverter extends ConverterImpl implements EnumerableRel {
     final Expression limit = list.append("limit", Expressions.constant(solrImplementor.limitValue));
     final Expression negativeQuery = list.append("negativeQuery", Expressions.constant(Boolean.toString(solrImplementor.negativeQuery), String.class));
     final Expression havingPredicate = list.append("havingTest", Expressions.constant(solrImplementor.havingPredicate, String.class));
+    final Expression offset = list.append("offset", Expressions.constant(solrImplementor.offsetValue));
     Expression enumerable = list.append("enumerable", Expressions.call(table, SolrMethod.SOLR_QUERYABLE_QUERY.method,
-        fields, query, orders, buckets, metricPairs, limit, negativeQuery, havingPredicate));
+        fields, query, orders, buckets, metricPairs, limit, negativeQuery, havingPredicate, offset));
     Hook.QUERY_PLAN.run(query);
     list.add(Expressions.return_(null, enumerable));
     return implementor.result(physType, list.toBlock());
diff --git a/solr/core/src/test-files/log4j2.xml b/solr/core/src/test-files/log4j2.xml
index d2e2dfa..53dcae5 100644
--- a/solr/core/src/test-files/log4j2.xml
+++ b/solr/core/src/test-files/log4j2.xml
@@ -34,7 +34,7 @@
     <Logger name="org.apache.directory" level="WARN"/>
     <Logger name="org.apache.solr.hadoop" level="INFO"/>
     <Logger name="org.eclipse.jetty" level="INFO"/>
-    <Logger name="org.apache.calcite" level="DEBUG"/>
+    <Logger name="org.apache.calcite" level="INFO"/>
 
     <Root level="INFO">
       <AppenderRef ref="STDERR"/>
diff --git a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
index 60990c0..68defe7 100644
--- a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
+++ b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
@@ -17,8 +17,8 @@
 package org.apache.solr.handler;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
 
@@ -1854,21 +1854,18 @@ public class TestSQLHandler extends SolrCloudTestCase {
   }
 
   protected List<Tuple> getTuples(final SolrParams params, String baseUrl) throws IOException {
-    //log.info("Tuples from params: {}", params);
-    TupleStream tupleStream = new SolrStream(baseUrl, params);
-
-    tupleStream.open();
-    List<Tuple> tuples = new ArrayList<>();
-    for (; ; ) {
-      Tuple t = tupleStream.read();
-      //log.info(" ... {}", t.fields);
-      if (t.EOF) {
-        break;
-      } else {
-        tuples.add(t);
+    List<Tuple> tuples = new LinkedList<>();
+    try (TupleStream tupleStream = new SolrStream(baseUrl, params)) {
+      tupleStream.open();
+      for (; ; ) {
+        Tuple t = tupleStream.read();
+        if (t.EOF) {
+          break;
+        } else {
+          tuples.add(t);
+        }
       }
     }
-    tupleStream.close();
     return tuples;
   }
 
@@ -2122,4 +2119,63 @@ public class TestSQLHandler extends SolrCloudTestCase {
   private String max(String type) {
     return String.format(Locale.ROOT, "max(%s) as max_%s", type, type);
   }
+
+  @Test
+  public void testOffsetAndFetch() throws Exception {
+    new UpdateRequest()
+        .add("id", "01")
+        .add("id", "02")
+        .add("id", "03")
+        .add("id", "04")
+        .add("id", "05")
+        .add("id", "06")
+        .add("id", "07")
+        .add("id", "08")
+        .add("id", "09")
+        .add("id", "10")
+        .add("id", "11")
+        .commit(cluster.getSolrClient(), COLLECTIONORALIAS);
+
+    final int numDocs = 11;
+
+    List<Tuple> results = expectResults("SELECT id FROM $ALIAS ORDER BY id DESC OFFSET 0 FETCH NEXT 5 ROWS ONLY", 5);
+    assertEquals("11", results.get(0).getString("id"));
+    assertEquals("10", results.get(1).getString("id"));
+    assertEquals("09", results.get(2).getString("id"));
+    assertEquals("08", results.get(3).getString("id"));
+    assertEquals("07", results.get(4).getString("id"));
+
+    // no explicit offset, but defaults to 0 if using FETCH!
+    results = expectResults("SELECT id FROM $ALIAS ORDER BY id DESC FETCH NEXT 5 ROWS ONLY", 5);
+    assertEquals("11", results.get(0).getString("id"));
+    assertEquals("10", results.get(1).getString("id"));
+    assertEquals("09", results.get(2).getString("id"));
+    assertEquals("08", results.get(3).getString("id"));
+    assertEquals("07", results.get(4).getString("id"));
+
+    results = expectResults("SELECT id FROM $ALIAS ORDER BY id DESC OFFSET 5 FETCH NEXT 5 ROWS ONLY", 5);
+    assertEquals("06", results.get(0).getString("id"));
+    assertEquals("05", results.get(1).getString("id"));
+    assertEquals("04", results.get(2).getString("id"));
+    assertEquals("03", results.get(3).getString("id"));
+    assertEquals("02", results.get(4).getString("id"));
+
+    results = expectResults("SELECT id FROM $ALIAS ORDER BY id DESC OFFSET 10 FETCH NEXT 5 ROWS ONLY", 1);
+    assertEquals("01", results.get(0).getString("id"));
+
+    expectResults("SELECT id FROM $ALIAS ORDER BY id DESC LIMIT "+numDocs, numDocs);
+
+    for (int i=0; i < numDocs; i++) {
+      results = expectResults("SELECT id FROM $ALIAS ORDER BY id ASC OFFSET "+i+" FETCH NEXT 1 ROW ONLY", 1);
+      String id = results.get(0).getString("id");
+      if (id.startsWith("0")) id = id.substring(1);
+      assertEquals(i+1, Integer.parseInt(id));
+    }
+
+    // just past the end of the results
+    expectResults("SELECT id FROM $ALIAS ORDER BY id DESC OFFSET "+numDocs+" FETCH NEXT 5 ROWS ONLY", 0);
+
+    // Solr doesn't support OFFSET w/o LIMIT
+    expectThrows(IOException.class, () -> expectResults("SELECT id FROM $ALIAS ORDER BY id DESC OFFSET 5", 5));
+  }
 }
diff --git a/solr/solr-ref-guide/src/parallel-sql-interface.adoc b/solr/solr-ref-guide/src/parallel-sql-interface.adoc
index 3571221..9947835 100644
--- a/solr/solr-ref-guide/src/parallel-sql-interface.adoc
+++ b/solr/solr-ref-guide/src/parallel-sql-interface.adoc
@@ -275,6 +275,20 @@ If the `ORDER BY` clause contains the exact fields in the `GROUP BY` clause, the
 
 Order by fields are case sensitive.
 
+==== OFFSET with FETCH
+
+Queries that specify an `ORDER BY` clause may also use the `OFFSET` (0-based index) and `FETCH` operators to page through results; `OFFSET` without `FETCH` is not supported and generates an exception.
+For example, the following query requests the second page of 10 results:
+[source,sql]
+----
+ORDER BY ... OFFSET 10 FETCH NEXT 10 ROWS ONLY
+----
+Paging with SQL suffers the same performance penalty of paging in Solr queries using `start` and `rows` where the distributed query must
+over-fetch `OFFSET` + `LIMIT` documents from each shard and then sort the results from each shard to generate the page of results returned to the client.
+Consequently, this feature should only be used for small OFFSET / FETCH sizes, such as paging up to 10,000 documents per shard. Solr SQL does not enforce any hard limits but the deeper you go into the results,
+each subsequent page request takes longer and consumes more resources. Solr's `cursorMark` feature for deep paging is not supported in SQL; use a SQL query without a `LIMIT` to stream large result sets through the `/export` handler instead.
+SQL `OFFSET` is not intended for deep-paging type use cases.
+
 === LIMIT Clause
 
 Limits the result set to the specified size. In the example above the clause `LIMIT 100` will limit the result set to 100 records.