You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by dl...@apache.org on 2020/07/02 19:27:05 UTC
[asterixdb] branch master updated: [NO ISSUE][EXT] Copy JSON object
data to buffer only when reading JSON object has started
This is an automated email from the ASF dual-hosted git repository.
dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 898d928 [NO ISSUE][EXT] Copy JSON object data to buffer only when reading JSON object has started
new 084bf5e Merge branch 'gerrit/mad-hatter'
898d928 is described below
commit 898d928a5f9f7cdf326b976e97e074c570f2b5c1
Author: Ali Alsuliman <al...@gmail.com>
AuthorDate: Sun Jun 28 17:47:24 2020 -0700
[NO ISSUE][EXT] Copy JSON object data to buffer only when reading JSON object has started
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Copy JSON object data to buffer only when reading JSON object has started since the JSON
object could be a nested object (i.e. leading comma character and spaces should not be copied over)
Change-Id: Iddac6d5c3926367770ffd31714c54361d99b4268
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7043
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hu...@gmail.com>
Reviewed-by: Ali Alsuliman <al...@gmail.com>
---
.../data/json/single-line/array_of_objects.json | 1 +
.../external_dataset/aws/AwsS3ExternalDatasetTest.java | 18 ++++++++++++++----
.../aws/s3/json/json/external_dataset.000.ddl.sqlpp | 11 +++++++++++
....007.ddl.sqlpp => external_dataset.007.query.sqlpp} | 4 +++-
...et.007.ddl.sqlpp => external_dataset.099.ddl.sqlpp} | 0
.../aws/s3/json/json/external_dataset.007.adm | 1 +
.../reader/stream/SemiStructuredRecordReader.java | 18 +++++++++---------
7 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/asterixdb/asterix-app/data/json/single-line/array_of_objects.json b/asterixdb/asterix-app/data/json/single-line/array_of_objects.json
new file mode 100644
index 0000000..5ea91f9
--- /dev/null
+++ b/asterixdb/asterix-app/data/json/single-line/array_of_objects.json
@@ -0,0 +1 @@
+[{"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": nu [...]
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 8ec311d..502a7eb 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -262,6 +262,9 @@ public class AwsS3ExternalDatasetTest {
loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
false);
+ definitionSegment = "json-array-of-objects";
+ loadData(dataBasePath, "single-line", "array_of_objects.json", "json-data/", definitionSegment, false, false);
+
// gz compressed format
definitionSegment = "gz";
loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
@@ -330,6 +333,11 @@ public class AwsS3ExternalDatasetTest {
private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
String definitionSegment, boolean removeExtension) {
+ loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
+ }
+
+ private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
+ String definitionSegment, boolean removeExtension, boolean copyToSubLevels) {
// Files data
Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
RequestBody requestBody = RequestBody.fromFile(filePath);
@@ -350,10 +358,12 @@ public class AwsS3ExternalDatasetTest {
// Load the data
client.putObject(builder.key(basePath + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
+ if (copyToSubLevels) {
+ client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
+ client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
+ client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
+ client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
+ }
}
private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp
index c3e0d20..87a2cef 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp
@@ -67,4 +67,15 @@ create external dataset test4(test) using S3 (
("container"="playground"),
("definition"="json-data/reviews/multi-lines-with-nested-objects/json"),
("format"="json")
+);
+
+drop dataset test5 if exists;
+create external dataset test5(test) using S3 (
+("accessKeyId"="dummyAccessKey"),
+("secretAccessKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="playground"),
+("definition"="json-data/single-line/json-array-of-objects"),
+("format"="json")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.query.sqlpp
similarity index 94%
copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.query.sqlpp
index 548e632..cdde056 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.query.sqlpp
@@ -17,4 +17,6 @@
* under the License.
*/
-drop dataverse test if exists;
\ No newline at end of file
+use test;
+
+select value count(*) from test5;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.099.ddl.sqlpp
similarity index 100%
rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp
rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.099.ddl.sqlpp
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.007.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.007.adm
new file mode 100644
index 0000000..86babba
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.007.adm
@@ -0,0 +1 @@
+50128
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 5f8d923..2ff5cfa 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -193,16 +193,16 @@ public class SemiStructuredRecordReader extends StreamRecordReader {
}
isLastCharCR = c == CR;
}
- }
- int appendLength = bufferPosn - startPosn;
- if (appendLength > 0) {
- try {
- record.append(inputBuffer, startPosn, appendLength);
- } catch (RuntimeDataException e) {
- reader.reset();
- bufferPosn = bufferLength = 0;
- throw e;
+ int appendLength = bufferPosn - startPosn;
+ if (appendLength > 0) {
+ try {
+ record.append(inputBuffer, startPosn, appendLength);
+ } catch (RuntimeDataException e) {
+ reader.reset();
+ bufferPosn = bufferLength = 0;
+ throw e;
+ }
}
}
} while (!hasFinished);