You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2023/06/14 01:18:51 UTC

[asterixdb] 14/14: Merge branch 'gerrit/neo' into 'master'

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 7952d127854a2ef4750df897f39a957257f88a9f
Merge: 9f38f8df67 fc90675063
Author: Michael Blow <mi...@couchbase.com>
AuthorDate: Tue Jun 13 19:11:15 2023 -0400

    Merge branch 'gerrit/neo' into 'master'
    
    Change-Id: I76febe15393ba442f007d36655cd68e2a6a75238

 .../non-pure-function.00.ddl.sqlpp                 |  30 ++++
 .../non-pure-function.01.update.sqlpp              |  24 +++
 .../non-pure-function.02.query.sqlpp               |  26 +++
 .../non-pure-function.03.query.sqlpp               |  27 ++++
 .../non-pure-function.04.query.sqlpp               |  28 ++++
 .../non-pure-function.05.query.sqlpp               |  27 ++++
 .../non-pure-function.06.query.sqlpp               |  28 ++++
 .../non-pure-function/non-pure-function.02.adm     |   1 +
 .../non-pure-function/non-pure-function.03.adm     |   1 +
 .../non-pure-function/non-pure-function.04.adm     |   1 +
 .../non-pure-function/non-pure-function.05.adm     |   1 +
 .../non-pure-function/non-pure-function.06.adm     |   1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |   5 +
 asterixdb/asterix-external-data/pom.xml            |  10 ++
 .../asterix/external/util/google/gcs/GCSUtils.java |   8 +-
 asterixdb/asterix-server/pom.xml                   |  87 ++++------
 asterixdb/pom.xml                                  |  22 ++-
 .../appended-resources/supplemental-models.xml     | 180 ++++++++++++---------
 ...bc8e88873f0c1e42723640536866d3df_COPYRIGHT.txt} |   0
 ...bc7348d6be23d2a9daaacd6b8424b8c1_COPYRIGHT.txt} |   4 +-
 ...7db1c3a9d42f1ae39fb35cfbb54a8742_COPYRIGHT.txt} |   4 +-
 ...5b7e93858a39ad4a46ba4d0e17a0aefe_COPYRIGHT.txt} |   4 +-
 ...-sdk-for-java_azure-identity_1.9.0_LICENSE.txt} |   0
 ...e-sdk-for-java_azure-identity_1.9.0_NOTICE.txt} | 141 +++++++++++++---
 ...or-java_azure-storage-blob_12.22.0_LICENSE.txt} |   0
 ...for-java_azure-storage-blob_12.22.0_NOTICE.txt} |  45 ++++--
 .../rules/InlineAssignIntoAggregateRule.java       |  31 ++--
 .../hyracks/control/common/config/OptionTypes.java |   4 +
 hyracks-fullstack/hyracks/hyracks-hdfs/pom.xml     |   8 +-
 hyracks-fullstack/pom.xml                          |  15 ++
 30 files changed, 571 insertions(+), 192 deletions(-)

diff --cc asterixdb/asterix-external-data/pom.xml
index 466bbb11f5,0698b8a20f..253754efb0
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@@ -550,16 -551,11 +555,21 @@@
        <groupId>org.eclipse.jetty</groupId>
        <artifactId>jetty-util-ajax</artifactId>
      </dependency>
+     <!-- Manually included to avoid CVE-2023-1370 -->
+     <dependency>
+       <groupId>net.minidev</groupId>
+       <artifactId>json-smart</artifactId>
+     </dependency>
 +    <dependency>
 +      <groupId>org.apache.iceberg</groupId>
 +      <artifactId>iceberg-core</artifactId>
 +      <version>1.1.0</version>
 +    </dependency>
 +    <dependency>
 +      <groupId>org.apache.avro</groupId>
 +      <artifactId>avro</artifactId>
 +      <version>1.11.1</version>
 +    </dependency>
    </dependencies>
    <!-- apply patch for HADOOP-17225 to workaround CVE-2019-10172 -->
    <repositories>
diff --cc asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
index 3efb041dae,0000000000..6183a88143
mode 100644,000000..100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
@@@ -1,242 -1,0 +1,242 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *   http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.asterix.external.util.google.gcs;
 +
 +import static org.apache.asterix.common.exceptions.ErrorCode.EXTERNAL_SOURCE_ERROR;
 +import static org.apache.asterix.common.exceptions.ErrorCode.INVALID_PARAM_VALUE_ALLOWED_VALUE;
 +import static org.apache.asterix.common.exceptions.ErrorCode.PARAM_NOT_ALLOWED_IF_PARAM_IS_PRESENT;
 +import static org.apache.asterix.external.util.ExternalDataUtils.getPrefix;
 +import static org.apache.asterix.external.util.ExternalDataUtils.validateIncludeExclude;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.ENDPOINT_FIELD_NAME;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_TYPE;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_UNAUTHENTICATED;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_ENDPOINT;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_GCS_PROTOCOL;
 +import static org.apache.asterix.external.util.google.gcs.GCSConstants.JSON_CREDENTIALS_FIELD_NAME;
 +import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
 +
 +import java.io.ByteArrayInputStream;
 +import java.io.IOException;
 +import java.io.InputStream;
 +import java.nio.file.Path;
 +import java.util.ArrayList;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.function.BiPredicate;
 +import java.util.regex.Matcher;
 +
 +import org.apache.asterix.common.exceptions.CompilationException;
 +import org.apache.asterix.common.exceptions.ErrorCode;
 +import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher;
 +import org.apache.asterix.external.util.ExternalDataConstants;
 +import org.apache.asterix.external.util.ExternalDataUtils;
 +import org.apache.asterix.external.util.HDFSUtils;
 +import org.apache.hadoop.mapred.JobConf;
 +import org.apache.hyracks.api.exceptions.IWarningCollector;
 +import org.apache.hyracks.api.exceptions.SourceLocation;
 +import org.apache.hyracks.api.exceptions.Warning;
 +
 +import com.google.api.gax.paging.Page;
 +import com.google.auth.oauth2.GoogleCredentials;
 +import com.google.cloud.BaseServiceException;
++import com.google.cloud.NoCredentials;
 +import com.google.cloud.storage.Blob;
 +import com.google.cloud.storage.Storage;
 +import com.google.cloud.storage.StorageOptions;
 +
 +public class GCSUtils {
 +    private GCSUtils() {
 +        throw new AssertionError("do not instantiate");
 +
 +    }
 +
 +    /**
 +     * Builds the client using the provided configuration
 +     *
 +     * @param configuration properties
 +     * @return clientasterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 +     * @throws CompilationException CompilationException
 +     */
 +    public static Storage buildClient(Map<String, String> configuration) throws CompilationException {
 +        String applicationDefaultCredentials = configuration.get(APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME);
 +        String jsonCredentials = configuration.get(JSON_CREDENTIALS_FIELD_NAME);
 +        String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
 +
 +        StorageOptions.Builder builder = StorageOptions.newBuilder();
 +
 +        // default credentials provider
 +        if (applicationDefaultCredentials != null) {
 +            // only "true" value is allowed
 +            if (!applicationDefaultCredentials.equalsIgnoreCase("true")) {
 +                throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE,
 +                        APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME, "true");
 +            }
 +
 +            // no other authentication parameters are allowed
 +            if (jsonCredentials != null) {
 +                throw new CompilationException(PARAM_NOT_ALLOWED_IF_PARAM_IS_PRESENT, JSON_CREDENTIALS_FIELD_NAME,
 +                        APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME);
 +            }
 +
 +            try {
 +                builder.setCredentials(GoogleCredentials.getApplicationDefault());
 +            } catch (IOException ex) {
 +                throw CompilationException.create(EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
 +            }
-         }
- 
-         // json credentials
-         if (jsonCredentials != null) {
++        } else if (jsonCredentials != null) {
 +            try (InputStream credentialsStream = new ByteArrayInputStream(jsonCredentials.getBytes())) {
 +                builder.setCredentials(GoogleCredentials.fromStream(credentialsStream));
 +            } catch (IOException ex) {
 +                throw new CompilationException(EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
 +            }
++        } else {
++            builder.setCredentials(NoCredentials.getInstance());
 +        }
 +
 +        if (endpoint != null) {
 +            builder.setHost(endpoint);
 +        }
 +
 +        return builder.build().getService();
 +    }
 +
 +    /**
 +     * Validate external dataset properties
 +     *
 +     * @param configuration properties
 +     * @throws CompilationException Compilation exception
 +     */
 +    public static void validateProperties(Map<String, String> configuration, SourceLocation srcLoc,
 +            IWarningCollector collector) throws CompilationException {
 +
 +        // check if the format property is present
 +        if (configuration.get(ExternalDataConstants.KEY_FORMAT) == null) {
 +            throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED, srcLoc, ExternalDataConstants.KEY_FORMAT);
 +        }
 +
 +        validateIncludeExclude(configuration);
 +        String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
 +
 +        try {
 +            Storage.BlobListOption limitOption = Storage.BlobListOption.pageSize(1);
 +            Storage.BlobListOption prefixOption = Storage.BlobListOption.prefix(getPrefix(configuration));
 +            Storage storage = buildClient(configuration);
 +            Page<Blob> items = storage.list(container, limitOption, prefixOption);
 +
 +            if (!items.iterateAll().iterator().hasNext() && collector.shouldWarn()) {
 +                Warning warning = Warning.of(srcLoc, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES);
 +                collector.warn(warning);
 +            }
 +        } catch (CompilationException ex) {
 +            throw ex;
 +        } catch (Exception ex) {
 +            throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
 +        }
 +    }
 +
 +    public static List<Blob> listItems(Map<String, String> configuration, IncludeExcludeMatcher includeExcludeMatcher,
 +            IWarningCollector warningCollector) throws CompilationException {
 +        // Prepare to retrieve the objects
 +        List<Blob> filesOnly = new ArrayList<>();
 +        String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
 +        Storage gcs = buildClient(configuration);
 +        Storage.BlobListOption options = Storage.BlobListOption.prefix(ExternalDataUtils.getPrefix(configuration));
 +        Page<Blob> items;
 +
 +        try {
 +            items = gcs.list(container, options);
 +        } catch (BaseServiceException ex) {
 +            throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
 +        }
 +
 +        // Collect the paths to files only
 +        collectAndFilterFiles(items, includeExcludeMatcher.getPredicate(), includeExcludeMatcher.getMatchersList(),
 +                filesOnly);
 +
 +        // Warn if no files are returned
 +        if (filesOnly.isEmpty() && warningCollector.shouldWarn()) {
 +            Warning warning = Warning.of(null, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES);
 +            warningCollector.warn(warning);
 +        }
 +
 +        return filesOnly;
 +    }
 +
 +    /**
 +     * Excludes paths ending with "/" as that's a directory indicator, we need to return the files only
 +     *
 +     * @param items List of returned objects
 +     */
 +    private static void collectAndFilterFiles(Page<Blob> items, BiPredicate<List<Matcher>, String> predicate,
 +            List<Matcher> matchers, List<Blob> filesOnly) {
 +        for (Blob item : items.iterateAll()) {
 +            // skip folders
 +            if (item.getName().endsWith("/")) {
 +                continue;
 +            }
 +
 +            // No filter, add file
 +            if (predicate.test(matchers, item.getName())) {
 +                filesOnly.add(item);
 +            }
 +        }
 +    }
 +
 +    /**
 +     * Builds the client using the provided configuration
 +     *
 +     * @param configuration      properties
 +     * @param numberOfPartitions number of partitions in the cluster
 +     */
 +    public static void configureHdfsJobConf(JobConf conf, Map<String, String> configuration, int numberOfPartitions) {
 +        String jsonCredentials = configuration.get(JSON_CREDENTIALS_FIELD_NAME);
 +        String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
 +
 +        // disable caching FileSystem
 +        HDFSUtils.disableHadoopFileSystemCache(conf, HADOOP_GCS_PROTOCOL);
 +
 +        // TODO(htowaileb): needs further testing, recommended to disable by gcs-hadoop team
 +        conf.set(GCSConstants.HADOOP_SUPPORT_COMPRESSED, ExternalDataConstants.FALSE);
 +
 +        // TODO(htowaileb): needs further testing
 +        // set number of threads
 +        //        conf.set(GCSConstants.HADOOP_MAX_REQUESTS_PER_BATCH, String.valueOf(numberOfPartitions));
 +        //        conf.set(GCSConstants.HADOOP_BATCH_THREADS, String.valueOf(numberOfPartitions));
 +
 +        // authentication method
 +        // TODO(htowaileb): find a way to pass the content instead of the path to keyfile, this line is temporary
 +        Path credentials = Path.of("credentials.json");
 +        if (jsonCredentials == null) {
 +            // anonymous access
 +            conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_UNAUTHENTICATED);
 +        } else {
 +            // TODO(htowaileb) need to pass the file content
 +            conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE);
 +            conf.set(HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH, credentials.toAbsolutePath().toString());
 +        }
 +
 +        // set endpoint if provided, default is https://storage.googleapis.com/
 +        if (endpoint != null) {
 +            conf.set(HADOOP_ENDPOINT, endpoint);
 +        }
 +    }
 +}
diff --cc asterixdb/asterix-server/pom.xml
index e068a2aa2d,0dcfbdec5e..6e279b836d
--- a/asterixdb/asterix-server/pom.xml
+++ b/asterixdb/asterix-server/pom.xml
@@@ -676,14 -518,7 +658,15 @@@
                  <aliasUrl>https://raw.githubusercontent.com/googleapis/gapic-generator-java/v2.13.0/java-common-protos/LICENSE</aliasUrl>
                  <aliasUrl>https://raw.githubusercontent.com/googleapis/google-api-java-client/v2.1.2/LICENSE</aliasUrl>
                  <aliasUrl>https://raw.githubusercontent.com/grpc/grpc-java/v1.52.1/LICENSE</aliasUrl>
+                 <aliasUrl>https://raw.githubusercontent.com/reactor/reactor-netty/v1.0.28/LICENSE</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/googleapis/java-core/v2.8.0/LICENSE</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/google/gson/gson-parent-2.9.0/LICENSE</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/allegro/json-avro-converter/json-avro-converter-0.2.15/LICENSE.md</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/airlift/aircompressor/0.21/license.txt</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/apache/orc/v1.8.0/LICENSE</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/RoaringBitmap/RoaringBitmap/0.9.39/LICENSE</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/JetBrains/java-annotations/master/LICENSE.txt</aliasUrl>
 +                <aliasUrl>https://raw.githubusercontent.com/awslabs/aws-crt-java/v0.21.10/LICENSE</aliasUrl>
                </aliasUrls>
                <metric>1</metric>
              </license>
diff --cc asterixdb/pom.xml
index 47544bbc8b,33ca4a80d9..681e9a8236
--- a/asterixdb/pom.xml
+++ b/asterixdb/pom.xml
@@@ -87,10 -87,12 +87,12 @@@
      <hadoop.version>3.3.4</hadoop.version>
      <jacoco.version>0.7.6.201602180812</jacoco.version>
      <log4j.version>2.19.0</log4j.version>
 -    <awsjavasdk.version>2.17.218</awsjavasdk.version>
 +    <awsjavasdk.version>2.20.37</awsjavasdk.version>
      <parquet.version>1.12.3</parquet.version>
      <hadoop-awsjavasdk.version>1.12.402</hadoop-awsjavasdk.version>
-     <azureblobjavasdk.version>12.14.2</azureblobjavasdk.version>
+     <azureblobjavasdk.version>12.22.0</azureblobjavasdk.version>
+     <azurecommonjavasdk.version>12.21.0</azurecommonjavasdk.version>
+     <azureidentity.version>1.9.0</azureidentity.version>
      <azuredatalakejavasdk.version>12.7.2</azuredatalakejavasdk.version>
      <gcsjavasdk.version>2.17.2</gcsjavasdk.version>
      <hadoop-azuresdk.version>8.6.6</hadoop-azuresdk.version>