You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2020/07/23 07:53:24 UTC
[james-project] 13/18: JAMES-3313 Write a Deduplicating blobStore
This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
commit ffab77e50c5a3491533e1a6c710b4de6c83da598
Author: Benoit Tellier <bt...@linagora.com>
AuthorDate: Fri Jul 17 16:21:31 2020 +0700
JAMES-3313 Write a Deduplicating blobStore
---
server/blob/blob-deduplicating/pom.xml | 4 +
.../deduplication/DeDuplicationBlobStore.scala | 100 +++++++++++++++++++++
2 files changed, 104 insertions(+)
diff --git a/server/blob/blob-deduplicating/pom.xml b/server/blob/blob-deduplicating/pom.xml
index 7f531e9..ebc0006 100644
--- a/server/blob/blob-deduplicating/pom.xml
+++ b/server/blob/blob-deduplicating/pom.xml
@@ -71,6 +71,10 @@
<artifactId>play-json_${scala.base}</artifactId>
</dependency>
<dependency>
+ <groupId>io.projectreactor</groupId>
+ <artifactId>reactor-scala-extensions_${scala.base}</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
diff --git a/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala b/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
new file mode 100644
index 0000000..5bd663f
--- /dev/null
+++ b/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
@@ -0,0 +1,100 @@
+/*****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ *****************************************************************/
+
+package org.apache.james.server.blob.deduplication
+
+import java.io.InputStream
+
+import com.google.common.base.Preconditions
+import com.google.common.hash.{Hashing, HashingInputStream}
+import com.google.common.io.{ByteSource, FileBackedOutputStream}
+import javax.inject.{Inject, Named}
+import org.apache.commons.io.IOUtils
+import org.apache.james.blob.api.{BlobId, BlobStore, BucketName, DumbBlobStore}
+import org.reactivestreams.Publisher
+import reactor.core.publisher.Mono
+import reactor.core.scala.publisher.SMono
+import reactor.util.function.{Tuple2, Tuples}
+
+object DeDuplicationBlobStore {
+ val DEFAULT_BUCKET = "defaultBucket"
+ val LAZY_RESOURCE_CLEANUP = false
+ val FILE_THRESHOLD = 10000
+}
+
+class DeDuplicationBlobStore @Inject()(dumbBlobStore: DumbBlobStore,
+ @Named("defaultBucket") defaultBucketName: BucketName,
+ blobIdFactory: BlobId.Factory) extends BlobStore {
+
+ override def save(bucketName: BucketName, data: Array[Byte], storagePolicy: BlobStore.StoragePolicy): Publisher[BlobId] = {
+ Preconditions.checkNotNull(bucketName)
+ Preconditions.checkNotNull(data)
+
+ val blobId = blobIdFactory.forPayload(data)
+
+ SMono(dumbBlobStore.save(bucketName, blobId, data))
+ .`then`(SMono.just(blobId))
+ }
+
+ override def save(bucketName: BucketName, data: InputStream, storagePolicy: BlobStore.StoragePolicy): Publisher[BlobId] = {
+ Preconditions.checkNotNull(bucketName)
+ Preconditions.checkNotNull(data)
+ val hashingInputStream = new HashingInputStream(Hashing.sha256, data)
+ val sourceSupplier: FileBackedOutputStream => SMono[BlobId] = (fileBackedOutputStream: FileBackedOutputStream) => saveAndGenerateBlobId(bucketName, hashingInputStream, fileBackedOutputStream)
+ Mono.using(() => new FileBackedOutputStream(DeDuplicationBlobStore.FILE_THRESHOLD),
+ sourceSupplier,
+ (fileBackedOutputStream: FileBackedOutputStream) => fileBackedOutputStream.reset(),
+ DeDuplicationBlobStore.LAZY_RESOURCE_CLEANUP)
+ }
+
+ private def saveAndGenerateBlobId(bucketName: BucketName, hashingInputStream: HashingInputStream, fileBackedOutputStream: FileBackedOutputStream): SMono[BlobId] =
+ SMono.fromCallable(() => {
+ IOUtils.copy(hashingInputStream, fileBackedOutputStream)
+ Tuples.of(blobIdFactory.from(hashingInputStream.hash.toString), fileBackedOutputStream.asByteSource)
+ })
+ .flatMap((tuple: Tuple2[BlobId, ByteSource]) =>
+ SMono(dumbBlobStore.save(bucketName, tuple.getT1, tuple.getT2))
+ .`then`(SMono.just(tuple.getT1)))
+
+
+ override def readBytes(bucketName: BucketName, blobId: BlobId): Publisher[Array[Byte]] = {
+ Preconditions.checkNotNull(bucketName)
+
+ dumbBlobStore.readBytes(bucketName, blobId)
+ }
+
+ override def read(bucketName: BucketName, blobId: BlobId): InputStream = {
+ Preconditions.checkNotNull(bucketName)
+
+ dumbBlobStore.read(bucketName, blobId)
+ }
+
+ override def getDefaultBucketName: BucketName = defaultBucketName
+
+ override def deleteBucket(bucketName: BucketName): Publisher[Void] = {
+ dumbBlobStore.deleteBucket(bucketName)
+ }
+
+ override def delete(bucketName: BucketName, blobId: BlobId): Publisher[Void] = {
+ Preconditions.checkNotNull(bucketName)
+ Preconditions.checkNotNull(blobId)
+
+ dumbBlobStore.delete(bucketName, blobId)
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org