You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by ro...@apache.org on 2020/06/11 15:25:38 UTC
[james-project] 01/17: JAMES-3150 Add ScalaCheck for the garbadge
collector
This is an automated email from the ASF dual-hosted git repository.
rouazana pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 0769feee3aa2c89ca70bf0ccfccf5bb03d1409be
Author: Matthieu Baechler <ma...@apache.org>
AuthorDate: Thu Feb 27 11:24:20 2020 +0100
JAMES-3150 Add ScalaCheck for the garbadge collector
---
gc-properties.adoc | 23 +++++
server/blob/blob-deduplicating/pom.xml | 106 ++++++++++++++++++++
.../src/test/scala/GCPropertiesTest.scala | 107 +++++++++++++++++++++
server/blob/pom.xml | 1 +
4 files changed, 237 insertions(+)
diff --git a/gc-properties.adoc b/gc-properties.adoc
new file mode 100644
index 0000000..7c69c01
--- /dev/null
+++ b/gc-properties.adoc
@@ -0,0 +1,23 @@
+= GC properties
+
+1. the execution time of the GC should be linked to
+active dataset but not to global dataset
+(for scalability purpose)
+
+2. GC should run on live dataset
+
+ 2.1. GC should not delete data being referenced by a pending process or
+still referenced
+
+ 2.2. GC should be idempotent: 2 concurrent or sequential runs should
+not have a different outcome than a single one
+
+3. GC should remove data from the underlying store
+
+ 3.1. an unreferenced piece of data should be removed after 1 day
+
+ 3.2. less than 10% of unreferenced data of a significant dataset
+should persist after three GC executions
+
+4. GC should report what it does
+
diff --git a/server/blob/blob-deduplicating/pom.xml b/server/blob/blob-deduplicating/pom.xml
new file mode 100644
index 0000000..e849535
--- /dev/null
+++ b/server/blob/blob-deduplicating/pom.xml
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>james-server-blob</artifactId>
+ <groupId>org.apache.james</groupId>
+ <version>3.5.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>blob-deduplicating</artifactId>
+ <packaging>jar</packaging>
+
+ <name>Apache James :: Server :: Blob :: Deduplicating Blob Storage</name>
+ <description>
+ An implementation of BlobStore which deduplicate the stored blobs and use a garbage collector
+ to ensure their effective deletion.
+ </description>
+
+ <dependencies>
+ <dependency>
+ <groupId>${james.groupId}</groupId>
+ <artifactId>blob-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>${james.groupId}</groupId>
+ <artifactId>blob-api</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${james.groupId}</groupId>
+ <artifactId>blob-memory</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${james.groupId}</groupId>
+ <artifactId>james-server-testing</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${james.groupId}</groupId>
+ <artifactId>james-server-util</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>${james.groupId}</groupId>
+ <artifactId>testing-base</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.scala-lang.modules</groupId>
+ <artifactId>scala-java8-compat_${scala.base}</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.scalactic</groupId>
+ <artifactId>scalactic_2.13</artifactId>
+ <version>3.1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_2.13</artifactId>
+ <version>3.1.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.scalacheck</groupId>
+ <artifactId>scalacheck_2.13</artifactId>
+ <version>1.14.3</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/server/blob/blob-deduplicating/src/test/scala/GCPropertiesTest.scala b/server/blob/blob-deduplicating/src/test/scala/GCPropertiesTest.scala
new file mode 100644
index 0000000..5de3f44
--- /dev/null
+++ b/server/blob/blob-deduplicating/src/test/scala/GCPropertiesTest.scala
@@ -0,0 +1,107 @@
+import org.apache.james.blob.api.{BlobId, TestBlobId}
+import org.scalacheck.Gen
+import org.scalatest.funsuite.AnyFunSuite
+
+case class Generation(id: Long)
+case class Iteration(id: Long)
+case class ExternalID(id: String) // TODO
+
+sealed trait Event
+case class Reference(externalId: ExternalID, blobId: BlobId, generation: Generation) extends Event
+case class Deletion(generation: Generation, reference: Reference) extends Event
+
+case class Report(iteration: Iteration,
+ blobsToDelete: Set[(Generation, BlobId)]
+ )
+
+object Generators {
+
+ val smallInteger = Gen.choose(0L,100L)
+ var current = 0;
+ val generationsGen: Gen[LazyList[Generation]] = Gen.infiniteLazyList(Gen.frequency((90, Gen.const(0)), (9, Gen.const(1)), (1, Gen.const(2))))
+ .map(list => list.scanLeft(0)(_ + _))
+ .map(list => list.map(_.toLong).map(Generation.apply))
+
+ val iterationGen = smallInteger.map(Iteration.apply)
+
+ val blobIdFactory = new TestBlobId.Factory
+
+ def blobIdGen(generation: Generation) : Gen[BlobId] = Gen.uuid.map(uuid =>
+ blobIdFactory.from(s"${generation}_$uuid"))
+
+ val externalIDGen = Gen.uuid.map(uuid => ExternalID(uuid.toString))
+
+ def referenceGen(generation: Generation): Gen[Reference] = for {
+ blobId <- blobIdGen(generation)
+ externalId <- externalIDGen
+ } yield Reference(externalId, blobId, generation)
+
+ def existingReferences : Seq[Event] => Set[Reference] = _
+ .foldLeft((Set[Reference](), Set[Reference]()))((acc, event) => event match {
+ case deletion: Deletion => (acc._1 ++ Set(deletion.reference), acc._2)
+ case reference: Reference => if (acc._1.contains(reference)) {
+ acc
+ } else {
+ (acc._1, acc._2 ++ Set(reference))
+ }
+ })._2
+
+ def deletionGen(previousEvents : Seq[Event], generation: Generation): Gen[Option[Deletion]] = {
+ val persistingReferences = existingReferences(previousEvents)
+ if (persistingReferences.isEmpty) {
+ Gen.const(None)
+ } else {
+ Gen.oneOf(persistingReferences)
+ .map(reference => Deletion(generation, reference))
+ .map(Some(_))
+ }
+ }
+
+ def duplicateReferenceGen(generation: Generation, reference: Reference): Gen[Reference] = {
+ if (reference.generation == generation) {
+ externalIDGen.map(id => reference.copy(externalId = id))
+ } else {
+ referenceGen(generation)
+ }
+ }
+
+ def eventGen(previousEvents: Seq[Event], generation: Generation): Gen[Event] = for {
+ greenAddEvent <- referenceGen(generation)
+ addEvents = previousEvents.flatMap {
+ case x: Reference => Some(x)
+ case _ => None
+ }
+ randomAddEvent <- Gen.oneOf(addEvents)
+ duplicateAddEvent <- duplicateReferenceGen(generation, randomAddEvent)
+ deleteEvent <- deletionGen(previousEvents, generation)
+ event <- Gen.oneOf(Seq(greenAddEvent, duplicateAddEvent) ++ deleteEvent)
+ } yield event
+
+ def eventsGen() : Gen[Seq[Event]] = for {
+ nbEvents <- Gen.choose(0, 100)
+ generations <- generationsGen.map(_.take(nbEvents))
+ startEvent <- referenceGen(Generation.apply(0))
+ events <- foldM(generations, (Seq(startEvent): Seq[Event]))((previousEvents, generation) => eventGen(previousEvents, generation).map(_ +: previousEvents))
+ } yield events.reverse
+
+ def foldM[A, B](fa: LazyList[A], z: B)(f: (B, A) => Gen[B]): Gen[B] = {
+ def step(in: (LazyList[A], B)): Gen[Either[(LazyList[A], B), B]] = {
+ val (s, b) = in
+ if (s.isEmpty)
+ Gen.const(Right(b))
+ else {
+ f (b, s.head).map { bnext =>
+ Left((s.tail, bnext))
+ }
+ }
+ }
+
+ Gen.tailRecM((fa, z))(step)
+ }
+}
+
+class GCPropertiesTest extends AnyFunSuite {
+ test("print sample") {
+ Generators.eventsGen().sample.foreach(_.foreach(println))
+ }
+}
diff --git a/server/blob/pom.xml b/server/blob/pom.xml
index 4351178..9744471 100644
--- a/server/blob/pom.xml
+++ b/server/blob/pom.xml
@@ -34,6 +34,7 @@
<modules>
<module>blob-api</module>
+ <module>blob-deduplicating</module>
<module>blob-cassandra</module>
<module>blob-common</module>
<module>blob-export-api</module>
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org