You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/09/28 18:20:10 UTC
[drill] branch master updated: DRILL-8320: Prevent Infinite Pagination for Index Paginator (#2660)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 9551372cd5 DRILL-8320: Prevent Infinite Pagination for Index Paginator (#2660)
9551372cd5 is described below
commit 9551372cd5678ff4ab7d40425b51ae485e202881
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Wed Sep 28 14:20:02 2022 -0400
DRILL-8320: Prevent Infinite Pagination for Index Paginator (#2660)
* DRILL-8320: Prevent Infinite Pagination for Index Paginator
---
contrib/storage-http/.gitignore | 1 +
.../drill/exec/store/http/HttpBatchReader.java | 28 ++++++++++-
.../drill/exec/store/http/HttpPaginatorConfig.java | 7 +--
.../exec/store/http/HttpScanBatchCreator.java | 6 +--
.../exec/store/http/paginator/IndexPaginator.java | 22 +++++++--
.../drill/exec/store/http/TestPagination.java | 54 +++++++++++++++++++++-
.../resources/data/nested_pagination_fields.json | 17 +++++++
.../resources/data/nested_pagination_fields2.json | 14 ++++++
.../easy/json/parser/JsonStructureParser.java | 1 -
9 files changed, 131 insertions(+), 19 deletions(-)
diff --git a/contrib/storage-http/.gitignore b/contrib/storage-http/.gitignore
new file mode 100644
index 0000000000..710368b187
--- /dev/null
+++ b/contrib/storage-http/.gitignore
@@ -0,0 +1 @@
+./src/test/resources/logback-test.xml
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
index e2bfee9114..6f312b0e9b 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
@@ -111,6 +111,8 @@ public class HttpBatchReader implements ManagedReader<SchemaNegotiator> {
};
negotiator.setErrorContext(errorContext);
+ logger.debug("Executing request with url: {}", url);
+
// Http client setup
SimpleHttp http = SimpleHttp.builder()
.scanDefn(subScan)
@@ -359,13 +361,32 @@ public class HttpBatchReader implements ManagedReader<SchemaNegotiator> {
if (StringUtils.isNotEmpty(indexPaginator.getNextPageParam())) {
indexPaginator.setNextPageValue(paginationFields.get(indexPaginator.getNextPageParam()).toString());
}
+ } else {
+ // This covers the case of keyset/index pagination where there isn't a boolean parameter to indicate whether there are more results.
+ // In this case, we will interpret the absence of the pagination field, receiving the same value, or a null value as the marker to stop pagination.
+ if ( (!paginationFields.containsKey(indexPaginator.getIndexParam())) ||
+ paginationFields.get(indexPaginator.getIndexParam()) == null
+ ) {
+ // End pagination
+ paginator.notifyPartialPage();
+ } else {
+ // Otherwise, check to see if the field is present but empty, or contains the value from the last page.
+ // This will prevent runaway pagination calls.
+ String indexParameter = paginationFields.get(indexPaginator.getIndexParam()).toString();
+ // Empty value or the last value is the same as the current one.
+ if (StringUtils.isEmpty(indexParameter) || (StringUtils.equals(indexParameter, indexPaginator.getLastIndexValue()))) {
+ paginator.notifyPartialPage();
+ } else {
+ // Whew! We made it... get the next page.
+ indexPaginator.setIndexValue(indexParameter);
+ }
+ }
}
}
@Override
public boolean next() {
boolean result = jsonLoader.readBatch();
-
// This code implements the index/keyset pagination. This pagination method
// uses a value returned in the current result set as the starting point for the
// next page. Some APIs will have a boolean parameter to indicate that there are
@@ -373,9 +394,12 @@ public class HttpBatchReader implements ManagedReader<SchemaNegotiator> {
// it is necessary to grab these values from the returned data.
if (paginator != null && paginator.getMode() == PaginatorMethod.INDEX) {
// First check to see if the limit has been reached. If so, mark the end of pagination.
- if (maxRecords > 0 && (resultSetLoader.totalRowCount() > maxRecords)) {
+ long totalRowCount = resultSetLoader.totalRowCount();
+ if (maxRecords > 0 && totalRowCount >= maxRecords) {
// End Pagination
paginator.notifyPartialPage();
+ // Returning false here because if there is a partial page, we want the reader to stop and no further batches to be created.
+ return false;
} else {
populateIndexPaginator();
}
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpPaginatorConfig.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpPaginatorConfig.java
index ad474f9bac..e6b226f8c6 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpPaginatorConfig.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpPaginatorConfig.java
@@ -120,12 +120,7 @@ public class HttpPaginatorConfig {
break;
case INDEX:
// Either the nextPageParam OR the indexParam must be populated
- if (StringUtils.isEmpty(this.hasMoreParam)) {
- throw UserException
- .validationError()
- .message("Invalid paginator configuration. For INDEX pagination, the hasMoreParam must be defined.")
- .build(logger);
- } else if ((StringUtils.isEmpty(this.nextPageParam) && StringUtils.isNotEmpty(this.indexParam)) &&
+ if ((StringUtils.isEmpty(this.nextPageParam) && StringUtils.isNotEmpty(this.indexParam)) &&
(StringUtils.isNotEmpty(this.nextPageParam) && StringUtils.isEmpty(this.indexParam))) {
throw UserException
.validationError()
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
index edc7dee1f9..3de783f7da 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpScanBatchCreator.java
@@ -86,9 +86,6 @@ public class HttpScanBatchCreator implements BatchCreator<HttpSubScan> {
ReaderFactory readerFactory = new HttpReaderFactory(subScan);
builder.setReaderFactory(readerFactory);
builder.nullType(Types.optional(MinorType.VARCHAR));
-
- // TODO Add page size limit here to ScanFramework Builder
-
return builder;
}
@@ -139,8 +136,9 @@ public class HttpScanBatchCreator implements BatchCreator<HttpSubScan> {
paginatorConfig.pageSizeParam());
} else if (paginatorConfig.getMethodType() == PaginatorMethod.INDEX) {
paginator = new IndexPaginator(urlBuilder,
+ 0, // Page size not used for Index/Keyset pagination
subScan.maxRecords(),
- 0, paginatorConfig.hasMoreParam(),
+ paginatorConfig.hasMoreParam(),
paginatorConfig.indexParam(),
paginatorConfig.nextPageParam());
}
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java
index 22aa531f54..67bc89c4a6 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java
@@ -20,19 +20,25 @@ package org.apache.drill.exec.store.http.paginator;
import okhttp3.HttpUrl.Builder;
import org.apache.commons.lang3.StringUtils;
+import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.store.http.HttpPaginatorConfig.PaginatorMethod;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
import java.util.NoSuchElementException;
public class IndexPaginator extends Paginator {
+ private static final Logger logger = LoggerFactory.getLogger(IndexPaginator.class);
private final String hasMoreParam;
private final String indexParam;
private final String nextPageParam;
private String indexValue;
+ private String lastIndexValue;
private Boolean hasMoreValue;
private String nextPageValue;
private int pageCount;
@@ -75,9 +81,14 @@ public class IndexPaginator extends Paginator {
}
public void setNextPageValue(String nextPageValue) {
+ this.lastIndexValue = this.nextPageValue;
this.nextPageValue = nextPageValue;
}
+ public String getLastIndexValue() {
+ return lastIndexValue;
+ }
+
public boolean isFirstPage() {
return pageCount < 1;
}
@@ -94,13 +105,14 @@ public class IndexPaginator extends Paginator {
throw new NoSuchElementException();
}
- if (StringUtils.isNotEmpty(nextPageValue)) {
- // TODO figure this out...
- } else if (StringUtils.isNotEmpty(indexValue)) {
+ if (StringUtils.isNotEmpty(indexValue)) {
try {
- indexValue = URLEncoder.encode(indexValue, "UTF-8");
+ indexValue = URLEncoder.encode(indexValue, StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
- // Do nothing.
+ // Should never happen
+ throw UserException.internalError()
+ .message(e.getMessage())
+ .build(logger);
}
builder.removeAllEncodedQueryParameters(indexParam);
builder.addQueryParameter(indexParam, indexValue);
diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java
index 328a447cb4..d04f230f3d 100644
--- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java
+++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java
@@ -65,7 +65,8 @@ public class TestPagination extends ClusterTest {
private static String TEST_JSON_INDEX_PAGE2;
private static String TEST_JSON_INDEX_PAGE3;
private static String TEST_JSON_INDEX_PAGE4;
-
+ private static String TEST_JSON_NESTED_INDEX;
+ private static String TEST_JSON_NESTED_INDEX2;
private static String TEST_XML_PAGE1;
private static String TEST_XML_PAGE2;
private static String TEST_XML_PAGE3;
@@ -89,6 +90,9 @@ public class TestPagination extends ClusterTest {
TEST_JSON_INDEX_PAGE3 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response3.json"), Charsets.UTF_8).read();
TEST_JSON_INDEX_PAGE4 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response4.json"), Charsets.UTF_8).read();
+ TEST_JSON_NESTED_INDEX = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/nested_pagination_fields.json"), Charsets.UTF_8).read();
+ TEST_JSON_NESTED_INDEX2 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/nested_pagination_fields2.json"), Charsets.UTF_8).read();
+
TEST_XML_PAGE1 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response_1.xml"), Charsets.UTF_8).read();
TEST_XML_PAGE2 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response_2.xml"), Charsets.UTF_8).read();
TEST_XML_PAGE3 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response_3.xml"), Charsets.UTF_8).read();
@@ -167,6 +171,31 @@ public class TestPagination extends ClusterTest {
.inputType("json")
.build();
+ HttpPaginatorConfig nestedIndexPaginator = HttpPaginatorConfig.builder()
+ .indexParam("after")
+ .method("index")
+ .build();
+
+ HttpApiConfig mockJsonConfigWitNestedKeyset = HttpApiConfig.builder()
+ .url("http://localhost:8092/json")
+ .method("get")
+ .headers(headers)
+ .requireTail(false)
+ .paginator(nestedIndexPaginator)
+ .inputType("json")
+ .build();
+
+ HttpApiConfig mockJsonConfigWitNestedKeysetAndDataPath = HttpApiConfig.builder()
+ .url("http://localhost:8092/json")
+ .method("get")
+ .headers(headers)
+ .dataPath("results")
+ .requireTail(false)
+ .paginator(nestedIndexPaginator)
+ .inputType("json")
+ .build();
+
+
HttpApiConfig mockJsonConfigWithKeysetAndDataPath = HttpApiConfig.builder()
.url("http://localhost:8092/json")
.method("get")
@@ -232,6 +261,8 @@ public class TestPagination extends ClusterTest {
configs.put("csv_paginator", mockCsvConfigWithPaginator);
configs.put("json_index", mockJsonConfigWithKeyset);
configs.put("json_index_datapath", mockJsonConfigWithKeysetAndDataPath);
+ configs.put("nested_keyset", mockJsonConfigWitNestedKeyset);
+ configs.put("nested_keyset_and_datapath", mockJsonConfigWitNestedKeysetAndDataPath);
configs.put("json_paginator", mockJsonConfigWithPaginator);
configs.put("xml_paginator", mockXmlConfigWithPaginator);
configs.put("xml_paginator_url_params", mockXmlConfigWithPaginatorAndUrlParams);
@@ -348,6 +379,27 @@ public class TestPagination extends ClusterTest {
assertEquals(4, count);
}
}
+ @Test
+ public void jsonQueryWithoutHasMore() throws Exception {
+ String sql = "SELECT * FROM `local`.`nested_keyset` LIMIT 4";
+ try (MockWebServer server = startServer()) {
+
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_NESTED_INDEX));
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_NESTED_INDEX2));
+
+ List<QueryDataBatch> results = client.queryBuilder()
+ .sql(sql)
+ .results();
+
+ int count = 0;
+ for(QueryDataBatch b : results){
+ count += b.getHeader().getRowCount();
+ b.release();
+ }
+ assertEquals(2, results.size());
+ assertEquals(2, count);
+ }
+ }
@Test
public void simpleJSONPaginatorQueryWithoutLimit() throws Exception {
diff --git a/contrib/storage-http/src/test/resources/data/nested_pagination_fields.json b/contrib/storage-http/src/test/resources/data/nested_pagination_fields.json
new file mode 100644
index 0000000000..3ad3845cd4
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/nested_pagination_fields.json
@@ -0,0 +1,17 @@
+{
+ "results": [
+ {
+ "id": "1",
+ "properties": {
+ "company": "HubSpot"
+ },
+ "archived": false
+ }
+ ],
+ "paging": {
+ "next": {
+ "after": "2",
+ "link": "https://api.hubapi.com/crm/v3/objects/contacts?includeAssociations=true&properties=record_id&properties=company&properties=createdate&properties=email&properties=firstname&properties=lastmodifieddate&properties=lastname&properties=phone&properties=website&limit=1&after=2"
+ }
+ }
+}
diff --git a/contrib/storage-http/src/test/resources/data/nested_pagination_fields2.json b/contrib/storage-http/src/test/resources/data/nested_pagination_fields2.json
new file mode 100644
index 0000000000..68f68d2f73
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/nested_pagination_fields2.json
@@ -0,0 +1,14 @@
+{
+ "results": [
+ {
+ "id": "2",
+ "properties": {
+ "company": "HubSpot2"
+ },
+ "archived": false
+ }
+ ],
+ "paging": {
+
+ }
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java
index 03fa51b695..4d33403070 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/parser/JsonStructureParser.java
@@ -264,7 +264,6 @@ public class JsonStructureParser {
}
while (true) {
try {
- // System.out.println(tokenizer.stringValue());
return rootState.parseRoot(tokenizer);
} catch (RecoverableJsonException e) {
if (! recover()) {