You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@solr.apache.org by GitBox <gi...@apache.org> on 2021/08/09 19:07:21 UTC

[GitHub] [solr] HoustonPutman commented on a change in pull request #120: SOLR-15089: Allow backup/restoration to Amazon's S3 blobstore

HoustonPutman commented on a change in pull request #120:
URL: https://github.com/apache/solr/pull/120#discussion_r685450117



##########
File path: solr/contrib/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java
##########
@@ -0,0 +1,568 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.s3;
+
+import com.amazonaws.AmazonClientException;
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.ClientConfiguration;
+import com.amazonaws.Protocol;
+import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.DeleteObjectsRequest;
+import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion;
+import com.amazonaws.services.s3.model.DeleteObjectsResult;
+import com.amazonaws.services.s3.model.ListObjectsRequest;
+import com.amazonaws.services.s3.model.ObjectListing;
+import com.amazonaws.services.s3.model.ObjectMetadata;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectSummary;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.input.ClosedInputStream;
+import org.apache.solr.common.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Closeable;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+
+/**
+ * Creates a {@link AmazonS3} for communicating with AWS S3. Utilizes the default credential provider chain;
+ * reference <a href="https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html">AWS SDK docs</a> for
+ * details on where this client will fetch credentials from, and the order of precedence.
+ */
+class S3StorageClient {
+
+    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    static final String BLOB_FILE_PATH_DELIMITER = "/";
+
+    // S3 has a hard limit of 1000 keys per batch delete request
+    private static final int MAX_KEYS_PER_BATCH_DELETE = 1000;
+
+    // Metadata name used to identify flag directory entries in S3
+    private static final String BLOB_DIR_CONTENT_TYPE = "application/x-directory";
+
+    // Error messages returned by S3 for a key not found.
+    private static final Set<String> NOT_FOUND_CODES = Set.of("NoSuchKey", "404 Not Found");
+
+    private final AmazonS3 s3Client;
+
+    /**
+     * The S3 bucket where we write all of our blobs to.
+     */
+    private final String bucketName;
+
+    S3StorageClient(String bucketName, String region, String proxyHost, int proxyPort, String endpoint) {
+        this(createInternalClient(region, proxyHost, proxyPort, endpoint), bucketName);
+    }
+
+    @VisibleForTesting
+    S3StorageClient(AmazonS3 s3Client, String bucketName) {
+        this.s3Client = s3Client;
+        this.bucketName = bucketName;
+    }
+
+    private static AmazonS3 createInternalClient(String region, String proxyHost, int proxyPort, String endpoint) {
+        ClientConfiguration clientConfig = new ClientConfiguration()
+            .withProtocol(Protocol.HTTPS);
+
+        // If configured, add proxy
+        if (!StringUtils.isEmpty(proxyHost)) {
+            clientConfig.setProxyHost(proxyHost);
+            if (proxyPort > 0) {
+                clientConfig.setProxyPort(proxyPort);
+            }
+        }
+
+        /*
+         * Default s3 client builder loads credentials from disk and handles token refreshes
+         */
+        AmazonS3ClientBuilder clientBuilder = AmazonS3ClientBuilder.standard()
+            .enablePathStyleAccess()
+            .withClientConfiguration(clientConfig);
+
+        if (!StringUtils.isEmpty(endpoint)) {
+            clientBuilder.setEndpointConfiguration(
+                new AwsClientBuilder.EndpointConfiguration(endpoint, region)
+            );
+        } else {
+            clientBuilder.setRegion(region);
+        }
+
+        return clientBuilder.build();
+    }
+
+    /**
+     * Create Directory in S3 Blob Store.
+     *
+     * @param path Directory Path in Blob Store.
+     */
+    void createDirectory(String path) throws S3Exception {
+        path = sanitizedDirPath(path);
+
+        if (!parentDirectoryExist(path)) {
+            createDirectory(getParentDirectory(path));
+            //TODO see https://issues.apache.org/jira/browse/SOLR-15359
+//            throw new BlobException("Parent directory doesn't exist, path=" + path);
+        }
+
+        ObjectMetadata objectMetadata = new ObjectMetadata();
+        objectMetadata.setContentType(BLOB_DIR_CONTENT_TYPE);
+        objectMetadata.setContentLength(0);
+
+        // Create empty blob object with header
+        final InputStream im = ClosedInputStream.CLOSED_INPUT_STREAM;
+
+        try {
+            PutObjectRequest putRequest = new PutObjectRequest(bucketName, path, im, objectMetadata);
+            s3Client.putObject(putRequest);
+        } catch (AmazonClientException ase) {
+            throw handleAmazonException(ase);
+        }
+    }
+
+    /**
+     /**
+     * Delete files from S3 Blob Store. Deletion order is not guaranteed.
+     *
+     * @param paths Paths to files or blobs.
+     */
+    void delete(Collection<String> paths) throws S3Exception {
+        Set<String> entries = new HashSet<>();
+        for (String path : paths) {
+            entries.add(sanitizedFilePath(path));
+        }
+
+        deleteBlobs(entries);
+    }
+
+    /**
+     * Delete directory, all the files and sub-directories from S3.
+     *
+     * @param path Path to directory in S3.
+     */
+    void deleteDirectory(String path) throws S3Exception {
+        path = sanitizedDirPath(path);
+
+        Set<String> entries = new HashSet<>();
+        if (pathExists(path)) {
+            entries.add(path);
+        }
+
+        // Get all the files and subdirectories
+        entries.addAll(listAll(path));
+
+        deleteObjects(entries);
+    }
+
+    /**
+     * List all the files and sub-directories directly under given path.
+     *
+     * @param path Path to directory in S3.
+     * @return Files and sub-directories in path.
+     */
+    String[] listDir(String path) throws S3Exception {
+        path = sanitizedDirPath(path);
+
+        String prefix = path;
+        ListObjectsRequest listRequest = new ListObjectsRequest()
+            .withBucketName(bucketName)
+            .withPrefix(prefix)
+            .withDelimiter(BLOB_FILE_PATH_DELIMITER);
+
+        List<String> entries = new ArrayList<>();
+        try {
+            ObjectListing objectListing = s3Client.listObjects(listRequest);
+
+            while (true) {
+                List<String> files = objectListing.getObjectSummaries().stream()
+                        .map(S3ObjectSummary::getKey)
+                        .collect(Collectors.toList());
+                files.addAll(objectListing.getCommonPrefixes());
+                // This filtering is needed only for S3mock. Real S3 does not ignore the trailing '/' in the prefix.
+                files = files.stream()
+                    .filter(s -> s.startsWith(prefix))
+                    .map(s -> s.substring(prefix.length()))
+                    .filter(s -> !s.isEmpty())
+                    .filter(s -> {
+                        int slashIndex = s.indexOf(BLOB_FILE_PATH_DELIMITER);
+                        return slashIndex == -1 || slashIndex == s.length() - 1;
+                    })
+                    .map(s -> {

Review comment:
       I do this because `AbstractBackupRepositoryTest.testCanListFullOrEmptyDirectories()` excepts directories to not end in an ending slash.
   
   I originally had changed the test to expect ending slashes, but it required a very large footprint through the rest of the backup-restore codebase that I thought it would be easier to just remove the ending slash here.
   
   I'll do some tests though and see why this is breaking the s3 mock use case.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org