You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wy...@apache.org on 2021/08/21 16:39:31 UTC

[asterixdb] branch master updated: [ASTERIXDB-2918][EXT] Validate the type when creating Parquet external dataset

This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 1bddb40  [ASTERIXDB-2918][EXT] Validate the type when creating Parquet external dataset
1bddb40 is described below

commit 1bddb400be3ee2aeb589a0221f415c4a802ff3e5
Author: Wail Alkowaileet <wa...@gmail.com>
AuthorDate: Fri Aug 20 10:23:43 2021 -0700

    [ASTERIXDB-2918][EXT] Validate the type when creating Parquet external dataset
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Ensure the used type -  when creating an external dataset using
    Parquet format - does not contain declared fields.
    
    Change-Id: I4870a91ecf41b41996b862704b767e04abc14569
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12905
    Reviewed-by: Hussain Towaileb <hu...@gmail.com>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
---
 .../asterix/app/translator/QueryTranslator.java    |  4 ++-
 .../parquet/invalid-type/invalid-type.1.ddl.sqlpp  | 42 ++++++++++++++++++++++
 .../runtimets/testsuite_external_dataset_s3.xml    |  7 ++++
 .../asterix/common/exceptions/ErrorCode.java       |  1 +
 .../src/main/resources/asx_errormsg/en.properties  |  1 +
 .../asterix/external/util/ExternalDataUtils.java   | 17 +++++++++
 6 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 3e0cf79..bf6e3b5 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -967,7 +967,9 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen
             Datatype itemType, MetadataProvider metadataProvider, MetadataTransactionContext mdTxnCtx)
             throws AlgebricksException {
         ExternalDetailsDecl externalDetails = (ExternalDetailsDecl) dd.getDatasetDetailsDecl();
-        return externalDetails.getProperties();
+        Map<String, String> properties = externalDetails.getProperties();
+        ExternalDataUtils.validateType(properties, (ARecordType) itemType.getDatatype());
+        return properties;
     }
 
     protected static void validateIfResourceIsActiveInFeed(ICcApplicationContext appCtx, Dataset dataset,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp
new file mode 100644
index 0000000..ad6cd0e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Test type validation for Parquet
+* Expected Res : ASX1161: Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format
+* Date         : August 19th 2021
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE ParquetType as {
+  id: string,
+  text: string
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+  %template%,
+  ("container"="playground"),
+  ("definition"="parquet-data/reviews"),
+  ("include"="*id_age.parquet"),
+  ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 41c769d..c461722 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -129,6 +129,13 @@
         <output-dir compare="Text">parquet-temporary-access</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/parquet/invalid-type">
+        <placeholder name="adapter" value="S3" />
+        <output-dir compare="Text">none</output-dir>
+        <expected-error>ASX1161: Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format</expected-error>
+      </compilation-unit>
+    </test-case>
     <!-- Parquet Tests End -->
     <test-case FilePath="external-dataset">
       <compilation-unit name="common/empty-string-definition">
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 9cfd6ea..79d663e 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -245,6 +245,7 @@ public enum ErrorCode implements IError {
     COMPILATION_BAD_VIEW_DEFINITION(1158),
     UNKNOWN_VIEW(1159),
     VIEW_EXISTS(1160),
+    UNSUPPORTED_TYPE_FOR_PARQUET(1161),
 
     // Feed errors
     DATAFLOW_ILLEGAL_STATE(3001),
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 5f08844..159e0ef 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -247,6 +247,7 @@
 1158 = Error compiling view %1$s. %2$s
 1159 = Cannot find view with name %1$s
 1160 = A view with this name %1$s already exists
+1161 = Type '%1$s' contains declared fields, which is not supported for 'parquet' format
 
 # Feed Errors
 3001 = Illegal state.
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 36ee203..112e4ee 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -771,6 +771,23 @@ public class ExternalDataUtils {
 
     public static boolean supportsPushdown(Map<String, String> properties) {
         //Currently, only Apache Parquet format is supported
+        return isParquetFormat(properties);
+    }
+
+    /**
+     * Validate the dataset type declared with a given type
+     *
+     * @param properties        external dataset configuration
+     * @param datasetRecordType dataset declared type
+     */
+    public static void validateType(Map<String, String> properties, ARecordType datasetRecordType)
+            throws CompilationException {
+        if (isParquetFormat(properties) && datasetRecordType.getFieldTypes().length != 0) {
+            throw new CompilationException(ErrorCode.UNSUPPORTED_TYPE_FOR_PARQUET, datasetRecordType.getTypeName());
+        }
+    }
+
+    private static boolean isParquetFormat(Map<String, String> properties) {
         String inputFormat = properties.get(ExternalDataConstants.KEY_INPUT_FORMAT);
         return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(inputFormat)
                 || ExternalDataConstants.INPUT_FORMAT_PARQUET.equals(inputFormat)