You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2023/06/14 01:18:51 UTC
[asterixdb] 14/14: Merge branch 'gerrit/neo' into 'master'
This is an automated email from the ASF dual-hosted git repository.
mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 7952d127854a2ef4750df897f39a957257f88a9f
Merge: 9f38f8df67 fc90675063
Author: Michael Blow <mi...@couchbase.com>
AuthorDate: Tue Jun 13 19:11:15 2023 -0400
Merge branch 'gerrit/neo' into 'master'
Change-Id: I76febe15393ba442f007d36655cd68e2a6a75238
.../non-pure-function.00.ddl.sqlpp | 30 ++++
.../non-pure-function.01.update.sqlpp | 24 +++
.../non-pure-function.02.query.sqlpp | 26 +++
.../non-pure-function.03.query.sqlpp | 27 ++++
.../non-pure-function.04.query.sqlpp | 28 ++++
.../non-pure-function.05.query.sqlpp | 27 ++++
.../non-pure-function.06.query.sqlpp | 28 ++++
.../non-pure-function/non-pure-function.02.adm | 1 +
.../non-pure-function/non-pure-function.03.adm | 1 +
.../non-pure-function/non-pure-function.04.adm | 1 +
.../non-pure-function/non-pure-function.05.adm | 1 +
.../non-pure-function/non-pure-function.06.adm | 1 +
.../test/resources/runtimets/testsuite_sqlpp.xml | 5 +
asterixdb/asterix-external-data/pom.xml | 10 ++
.../asterix/external/util/google/gcs/GCSUtils.java | 8 +-
asterixdb/asterix-server/pom.xml | 87 ++++------
asterixdb/pom.xml | 22 ++-
.../appended-resources/supplemental-models.xml | 180 ++++++++++++---------
...bc8e88873f0c1e42723640536866d3df_COPYRIGHT.txt} | 0
...bc7348d6be23d2a9daaacd6b8424b8c1_COPYRIGHT.txt} | 4 +-
...7db1c3a9d42f1ae39fb35cfbb54a8742_COPYRIGHT.txt} | 4 +-
...5b7e93858a39ad4a46ba4d0e17a0aefe_COPYRIGHT.txt} | 4 +-
...-sdk-for-java_azure-identity_1.9.0_LICENSE.txt} | 0
...e-sdk-for-java_azure-identity_1.9.0_NOTICE.txt} | 141 +++++++++++++---
...or-java_azure-storage-blob_12.22.0_LICENSE.txt} | 0
...for-java_azure-storage-blob_12.22.0_NOTICE.txt} | 45 ++++--
.../rules/InlineAssignIntoAggregateRule.java | 31 ++--
.../hyracks/control/common/config/OptionTypes.java | 4 +
hyracks-fullstack/hyracks/hyracks-hdfs/pom.xml | 8 +-
hyracks-fullstack/pom.xml | 15 ++
30 files changed, 571 insertions(+), 192 deletions(-)
diff --cc asterixdb/asterix-external-data/pom.xml
index 466bbb11f5,0698b8a20f..253754efb0
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@@ -550,16 -551,11 +555,21 @@@
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util-ajax</artifactId>
</dependency>
+ <!-- Manually included to avoid CVE-2023-1370 -->
+ <dependency>
+ <groupId>net.minidev</groupId>
+ <artifactId>json-smart</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.iceberg</groupId>
+ <artifactId>iceberg-core</artifactId>
+ <version>1.1.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>1.11.1</version>
+ </dependency>
</dependencies>
<!-- apply patch for HADOOP-17225 to workaround CVE-2019-10172 -->
<repositories>
diff --cc asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
index 3efb041dae,0000000000..6183a88143
mode 100644,000000..100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/google/gcs/GCSUtils.java
@@@ -1,242 -1,0 +1,242 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.util.google.gcs;
+
+import static org.apache.asterix.common.exceptions.ErrorCode.EXTERNAL_SOURCE_ERROR;
+import static org.apache.asterix.common.exceptions.ErrorCode.INVALID_PARAM_VALUE_ALLOWED_VALUE;
+import static org.apache.asterix.common.exceptions.ErrorCode.PARAM_NOT_ALLOWED_IF_PARAM_IS_PRESENT;
+import static org.apache.asterix.external.util.ExternalDataUtils.getPrefix;
+import static org.apache.asterix.external.util.ExternalDataUtils.validateIncludeExclude;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.ENDPOINT_FIELD_NAME;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_TYPE;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_AUTH_UNAUTHENTICATED;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_ENDPOINT;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.HADOOP_GCS_PROTOCOL;
+import static org.apache.asterix.external.util.google.gcs.GCSConstants.JSON_CREDENTIALS_FIELD_NAME;
+import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiPredicate;
+import java.util.regex.Matcher;
+
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.SourceLocation;
+import org.apache.hyracks.api.exceptions.Warning;
+
+import com.google.api.gax.paging.Page;
+import com.google.auth.oauth2.GoogleCredentials;
+import com.google.cloud.BaseServiceException;
++import com.google.cloud.NoCredentials;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+
+public class GCSUtils {
+ private GCSUtils() {
+ throw new AssertionError("do not instantiate");
+
+ }
+
+ /**
+ * Builds the client using the provided configuration
+ *
+ * @param configuration properties
+ * @return clientasterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+ * @throws CompilationException CompilationException
+ */
+ public static Storage buildClient(Map<String, String> configuration) throws CompilationException {
+ String applicationDefaultCredentials = configuration.get(APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME);
+ String jsonCredentials = configuration.get(JSON_CREDENTIALS_FIELD_NAME);
+ String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
+
+ StorageOptions.Builder builder = StorageOptions.newBuilder();
+
+ // default credentials provider
+ if (applicationDefaultCredentials != null) {
+ // only "true" value is allowed
+ if (!applicationDefaultCredentials.equalsIgnoreCase("true")) {
+ throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE,
+ APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME, "true");
+ }
+
+ // no other authentication parameters are allowed
+ if (jsonCredentials != null) {
+ throw new CompilationException(PARAM_NOT_ALLOWED_IF_PARAM_IS_PRESENT, JSON_CREDENTIALS_FIELD_NAME,
+ APPLICATION_DEFAULT_CREDENTIALS_FIELD_NAME);
+ }
+
+ try {
+ builder.setCredentials(GoogleCredentials.getApplicationDefault());
+ } catch (IOException ex) {
+ throw CompilationException.create(EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
+ }
- }
-
- // json credentials
- if (jsonCredentials != null) {
++ } else if (jsonCredentials != null) {
+ try (InputStream credentialsStream = new ByteArrayInputStream(jsonCredentials.getBytes())) {
+ builder.setCredentials(GoogleCredentials.fromStream(credentialsStream));
+ } catch (IOException ex) {
+ throw new CompilationException(EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
+ }
++ } else {
++ builder.setCredentials(NoCredentials.getInstance());
+ }
+
+ if (endpoint != null) {
+ builder.setHost(endpoint);
+ }
+
+ return builder.build().getService();
+ }
+
+ /**
+ * Validate external dataset properties
+ *
+ * @param configuration properties
+ * @throws CompilationException Compilation exception
+ */
+ public static void validateProperties(Map<String, String> configuration, SourceLocation srcLoc,
+ IWarningCollector collector) throws CompilationException {
+
+ // check if the format property is present
+ if (configuration.get(ExternalDataConstants.KEY_FORMAT) == null) {
+ throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED, srcLoc, ExternalDataConstants.KEY_FORMAT);
+ }
+
+ validateIncludeExclude(configuration);
+ String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
+
+ try {
+ Storage.BlobListOption limitOption = Storage.BlobListOption.pageSize(1);
+ Storage.BlobListOption prefixOption = Storage.BlobListOption.prefix(getPrefix(configuration));
+ Storage storage = buildClient(configuration);
+ Page<Blob> items = storage.list(container, limitOption, prefixOption);
+
+ if (!items.iterateAll().iterator().hasNext() && collector.shouldWarn()) {
+ Warning warning = Warning.of(srcLoc, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES);
+ collector.warn(warning);
+ }
+ } catch (CompilationException ex) {
+ throw ex;
+ } catch (Exception ex) {
+ throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
+ }
+ }
+
+ public static List<Blob> listItems(Map<String, String> configuration, IncludeExcludeMatcher includeExcludeMatcher,
+ IWarningCollector warningCollector) throws CompilationException {
+ // Prepare to retrieve the objects
+ List<Blob> filesOnly = new ArrayList<>();
+ String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
+ Storage gcs = buildClient(configuration);
+ Storage.BlobListOption options = Storage.BlobListOption.prefix(ExternalDataUtils.getPrefix(configuration));
+ Page<Blob> items;
+
+ try {
+ items = gcs.list(container, options);
+ } catch (BaseServiceException ex) {
+ throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
+ }
+
+ // Collect the paths to files only
+ collectAndFilterFiles(items, includeExcludeMatcher.getPredicate(), includeExcludeMatcher.getMatchersList(),
+ filesOnly);
+
+ // Warn if no files are returned
+ if (filesOnly.isEmpty() && warningCollector.shouldWarn()) {
+ Warning warning = Warning.of(null, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES);
+ warningCollector.warn(warning);
+ }
+
+ return filesOnly;
+ }
+
+ /**
+ * Excludes paths ending with "/" as that's a directory indicator, we need to return the files only
+ *
+ * @param items List of returned objects
+ */
+ private static void collectAndFilterFiles(Page<Blob> items, BiPredicate<List<Matcher>, String> predicate,
+ List<Matcher> matchers, List<Blob> filesOnly) {
+ for (Blob item : items.iterateAll()) {
+ // skip folders
+ if (item.getName().endsWith("/")) {
+ continue;
+ }
+
+ // No filter, add file
+ if (predicate.test(matchers, item.getName())) {
+ filesOnly.add(item);
+ }
+ }
+ }
+
+ /**
+ * Builds the client using the provided configuration
+ *
+ * @param configuration properties
+ * @param numberOfPartitions number of partitions in the cluster
+ */
+ public static void configureHdfsJobConf(JobConf conf, Map<String, String> configuration, int numberOfPartitions) {
+ String jsonCredentials = configuration.get(JSON_CREDENTIALS_FIELD_NAME);
+ String endpoint = configuration.get(ENDPOINT_FIELD_NAME);
+
+ // disable caching FileSystem
+ HDFSUtils.disableHadoopFileSystemCache(conf, HADOOP_GCS_PROTOCOL);
+
+ // TODO(htowaileb): needs further testing, recommended to disable by gcs-hadoop team
+ conf.set(GCSConstants.HADOOP_SUPPORT_COMPRESSED, ExternalDataConstants.FALSE);
+
+ // TODO(htowaileb): needs further testing
+ // set number of threads
+ // conf.set(GCSConstants.HADOOP_MAX_REQUESTS_PER_BATCH, String.valueOf(numberOfPartitions));
+ // conf.set(GCSConstants.HADOOP_BATCH_THREADS, String.valueOf(numberOfPartitions));
+
+ // authentication method
+ // TODO(htowaileb): find a way to pass the content instead of the path to keyfile, this line is temporary
+ Path credentials = Path.of("credentials.json");
+ if (jsonCredentials == null) {
+ // anonymous access
+ conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_UNAUTHENTICATED);
+ } else {
+ // TODO(htowaileb) need to pass the file content
+ conf.set(HADOOP_AUTH_TYPE, HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE);
+ conf.set(HADOOP_AUTH_SERVICE_ACCOUNT_JSON_KEY_FILE_PATH, credentials.toAbsolutePath().toString());
+ }
+
+ // set endpoint if provided, default is https://storage.googleapis.com/
+ if (endpoint != null) {
+ conf.set(HADOOP_ENDPOINT, endpoint);
+ }
+ }
+}
diff --cc asterixdb/asterix-server/pom.xml
index e068a2aa2d,0dcfbdec5e..6e279b836d
--- a/asterixdb/asterix-server/pom.xml
+++ b/asterixdb/asterix-server/pom.xml
@@@ -676,14 -518,7 +658,15 @@@
<aliasUrl>https://raw.githubusercontent.com/googleapis/gapic-generator-java/v2.13.0/java-common-protos/LICENSE</aliasUrl>
<aliasUrl>https://raw.githubusercontent.com/googleapis/google-api-java-client/v2.1.2/LICENSE</aliasUrl>
<aliasUrl>https://raw.githubusercontent.com/grpc/grpc-java/v1.52.1/LICENSE</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/reactor/reactor-netty/v1.0.28/LICENSE</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/googleapis/java-core/v2.8.0/LICENSE</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/google/gson/gson-parent-2.9.0/LICENSE</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/allegro/json-avro-converter/json-avro-converter-0.2.15/LICENSE.md</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/airlift/aircompressor/0.21/license.txt</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/apache/orc/v1.8.0/LICENSE</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/RoaringBitmap/RoaringBitmap/0.9.39/LICENSE</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/JetBrains/java-annotations/master/LICENSE.txt</aliasUrl>
+ <aliasUrl>https://raw.githubusercontent.com/awslabs/aws-crt-java/v0.21.10/LICENSE</aliasUrl>
</aliasUrls>
<metric>1</metric>
</license>
diff --cc asterixdb/pom.xml
index 47544bbc8b,33ca4a80d9..681e9a8236
--- a/asterixdb/pom.xml
+++ b/asterixdb/pom.xml
@@@ -87,10 -87,12 +87,12 @@@
<hadoop.version>3.3.4</hadoop.version>
<jacoco.version>0.7.6.201602180812</jacoco.version>
<log4j.version>2.19.0</log4j.version>
- <awsjavasdk.version>2.17.218</awsjavasdk.version>
+ <awsjavasdk.version>2.20.37</awsjavasdk.version>
<parquet.version>1.12.3</parquet.version>
<hadoop-awsjavasdk.version>1.12.402</hadoop-awsjavasdk.version>
- <azureblobjavasdk.version>12.14.2</azureblobjavasdk.version>
+ <azureblobjavasdk.version>12.22.0</azureblobjavasdk.version>
+ <azurecommonjavasdk.version>12.21.0</azurecommonjavasdk.version>
+ <azureidentity.version>1.9.0</azureidentity.version>
<azuredatalakejavasdk.version>12.7.2</azuredatalakejavasdk.version>
<gcsjavasdk.version>2.17.2</gcsjavasdk.version>
<hadoop-azuresdk.version>8.6.6</hadoop-azuresdk.version>