You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by dl...@apache.org on 2020/07/02 19:27:05 UTC

[asterixdb] branch master updated: [NO ISSUE][EXT] Copy JSON object data to buffer only when reading JSON object has started

This is an automated email from the ASF dual-hosted git repository.

dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 898d928  [NO ISSUE][EXT] Copy JSON object data to buffer only when reading JSON object has started
     new 084bf5e  Merge branch 'gerrit/mad-hatter'
898d928 is described below

commit 898d928a5f9f7cdf326b976e97e074c570f2b5c1
Author: Ali Alsuliman <al...@gmail.com>
AuthorDate: Sun Jun 28 17:47:24 2020 -0700

    [NO ISSUE][EXT] Copy JSON object data to buffer only when reading JSON object has started
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Copy JSON object data to buffer only when reading JSON object has started since the JSON
    object could be a nested object (i.e. leading comma character and spaces should not be copied over)
    
    Change-Id: Iddac6d5c3926367770ffd31714c54361d99b4268
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7043
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Hussain Towaileb <hu...@gmail.com>
    Reviewed-by: Ali Alsuliman <al...@gmail.com>
---
 .../data/json/single-line/array_of_objects.json        |  1 +
 .../external_dataset/aws/AwsS3ExternalDatasetTest.java | 18 ++++++++++++++----
 .../aws/s3/json/json/external_dataset.000.ddl.sqlpp    | 11 +++++++++++
 ....007.ddl.sqlpp => external_dataset.007.query.sqlpp} |  4 +++-
 ...et.007.ddl.sqlpp => external_dataset.099.ddl.sqlpp} |  0
 .../aws/s3/json/json/external_dataset.007.adm          |  1 +
 .../reader/stream/SemiStructuredRecordReader.java      | 18 +++++++++---------
 7 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/asterixdb/asterix-app/data/json/single-line/array_of_objects.json b/asterixdb/asterix-app/data/json/single-line/array_of_objects.json
new file mode 100644
index 0000000..5ea91f9
--- /dev/null
+++ b/asterixdb/asterix-app/data/json/single-line/array_of_objects.json
@@ -0,0 +1 @@
+[{"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": null, "missing": 0}, {"mutated": 0, "filename": "file_0.json", "folder": "", "null": nu [...]
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 8ec311d..502a7eb 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -262,6 +262,9 @@ public class AwsS3ExternalDatasetTest {
         loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
                 false);
 
+        definitionSegment = "json-array-of-objects";
+        loadData(dataBasePath, "single-line", "array_of_objects.json", "json-data/", definitionSegment, false, false);
+
         // gz compressed format
         definitionSegment = "gz";
         loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
@@ -330,6 +333,11 @@ public class AwsS3ExternalDatasetTest {
 
     private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
             String definitionSegment, boolean removeExtension) {
+        loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
+    }
+
+    private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
+            String definitionSegment, boolean removeExtension, boolean copyToSubLevels) {
         // Files data
         Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
         RequestBody requestBody = RequestBody.fromFile(filePath);
@@ -350,10 +358,12 @@ public class AwsS3ExternalDatasetTest {
 
         // Load the data
         client.putObject(builder.key(basePath + finalFileName).build(), requestBody);
-        client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
-        client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
-        client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
-        client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
+        if (copyToSubLevels) {
+            client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
+            client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
+            client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
+            client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
+        }
     }
 
     private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp
index c3e0d20..87a2cef 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.000.ddl.sqlpp
@@ -67,4 +67,15 @@ create external dataset test4(test) using S3 (
 ("container"="playground"),
 ("definition"="json-data/reviews/multi-lines-with-nested-objects/json"),
 ("format"="json")
+);
+
+drop dataset test5 if exists;
+create external dataset test5(test) using S3 (
+("accessKeyId"="dummyAccessKey"),
+("secretAccessKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="playground"),
+("definition"="json-data/single-line/json-array-of-objects"),
+("format"="json")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.query.sqlpp
similarity index 94%
copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp
copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.query.sqlpp
index 548e632..cdde056 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.query.sqlpp
@@ -17,4 +17,6 @@
  * under the License.
  */
 
-drop dataverse test if exists;
\ No newline at end of file
+use test;
+
+select value count(*) from test5;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.099.ddl.sqlpp
similarity index 100%
rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.007.ddl.sqlpp
rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/json/json/external_dataset.099.ddl.sqlpp
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.007.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.007.adm
new file mode 100644
index 0000000..86babba
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/json/json/external_dataset.007.adm
@@ -0,0 +1 @@
+50128
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 5f8d923..2ff5cfa 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -193,16 +193,16 @@ public class SemiStructuredRecordReader extends StreamRecordReader {
                     }
                     isLastCharCR = c == CR;
                 }
-            }
 
-            int appendLength = bufferPosn - startPosn;
-            if (appendLength > 0) {
-                try {
-                    record.append(inputBuffer, startPosn, appendLength);
-                } catch (RuntimeDataException e) {
-                    reader.reset();
-                    bufferPosn = bufferLength = 0;
-                    throw e;
+                int appendLength = bufferPosn - startPosn;
+                if (appendLength > 0) {
+                    try {
+                        record.append(inputBuffer, startPosn, appendLength);
+                    } catch (RuntimeDataException e) {
+                        reader.reset();
+                        bufferPosn = bufferLength = 0;
+                        throw e;
+                    }
                 }
             }
         } while (!hasFinished);