You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2020/07/03 02:03:56 UTC
[james-project] 01/15: JAMES-3150 Add debugging support for the
garbage collection
This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 3728fae8a8a978959a849257c2a5fce99e7defdd
Author: Gautier DI FOLCO <gd...@linagora.com>
AuthorDate: Wed Apr 15 15:56:47 2020 +0200
JAMES-3150 Add debugging support for the garbage collection
---
server/blob/blob-deduplicating/pom.xml | 5 +
.../james/server/blob/deduplication/GC.scala | 12 +-
.../server/blob/deduplication/GCJsonReporter.scala | 170 ++++++++++++++++++
.../src/test/resources/gcReport.json | 63 +++++++
.../blob/deduplication/GCJsonReporterTest.scala | 190 +++++++++++++++++++++
.../blob/deduplication/GCPropertiesTest.scala | 38 +----
.../james/server/blob/deduplication/State.scala | 37 ++++
7 files changed, 476 insertions(+), 39 deletions(-)
diff --git a/server/blob/blob-deduplicating/pom.xml b/server/blob/blob-deduplicating/pom.xml
index ada4371..0bf6147 100644
--- a/server/blob/blob-deduplicating/pom.xml
+++ b/server/blob/blob-deduplicating/pom.xml
@@ -67,6 +67,11 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>com.typesafe.play</groupId>
+ <artifactId>play-json_${scala.base}</artifactId>
+ <version>2.8.1</version>
+ </dependency>
+ <dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
diff --git a/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GC.scala b/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GC.scala
index 0fa4ea8..1450e5f 100644
--- a/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GC.scala
+++ b/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GC.scala
@@ -42,6 +42,8 @@ sealed abstract class Generation extends Comparable[Generation] {
def <=(that: Generation): Boolean = compareTo(that) <= 0
def >(that: Generation): Boolean = compareTo(that) > 0
def >=(that: Generation): Boolean = compareTo(that) >= 0
+
+ def asString: String
}
object Generation {
@@ -76,6 +78,7 @@ case class ValidGeneration(id: Long) extends Generation {
case that: ValidGeneration => id.compareTo(that.id)
}
+ override def asString: String = id.toString
}
/**
@@ -90,6 +93,8 @@ case object NonExistingGeneration extends Generation {
case NonExistingGeneration => 0
case _: ValidGeneration => -1
}
+
+ override def asString: String = "non_existing"
}
/**
@@ -97,6 +102,7 @@ case object NonExistingGeneration extends Generation {
*/
case class Iteration(id: Long, processedGenerations: Set[Generation], lastGeneration: Generation) {
def next(generations: Set[Generation], lastGeneration: Generation): Iteration = Iteration(id + 1, generations, lastGeneration)
+ def asString = id.toString
}
object Iteration {
@@ -129,7 +135,7 @@ object Events {
}
-case class Report(iteration: Iteration, blobsToDelete: Set[(Generation, BlobId)])
+case class GCIterationReport(iteration: Iteration, blobsToDelete: Set[(Generation, BlobId)])
/**
* Accessors to the References/Dereferences made by generations
@@ -173,7 +179,7 @@ case class StabilizedState(references: Map[Generation, Seq[Reference]], derefere
object GC {
val temporization: Long = 2
- def plan(state: StabilizedState, lastIteration: Iteration, targetedGeneration: Generation): Report = {
+ def plan(state: StabilizedState, lastIteration: Iteration, targetedGeneration: Generation): GCIterationReport = {
val processedGenerations = lastIteration.lastGeneration.collectibles(targetedGeneration)
val blobsToDelete = state.dereferences
.filter { case (generation, _) => processedGenerations.contains(generation) }
@@ -182,6 +188,6 @@ object GC {
.filter(dereference => state.referencesAt(processedGenerations.max).isNotReferenced(dereference.reference.blobId))
.map(dereference => (dereference.reference.generation, dereference.reference.blobId))
- Report(lastIteration.next(processedGenerations, targetedGeneration.previous(temporization)), blobsToDelete)
+ GCIterationReport(lastIteration.next(processedGenerations, targetedGeneration.previous(temporization)), blobsToDelete)
}
}
diff --git a/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GCJsonReporter.scala b/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GCJsonReporter.scala
new file mode 100644
index 0000000..a37ec36
--- /dev/null
+++ b/server/blob/blob-deduplicating/src/main/scala/org/apache/james/server/blob/deduplication/GCJsonReporter.scala
@@ -0,0 +1,170 @@
+/***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+package org.apache.james.server.blob.deduplication
+
+import org.apache.james.blob.api.BlobId
+import org.apache.james.server.blob.deduplication.RelatedAction.{Delete, GarbageCollect, Init, Save}
+import play.api.libs.json.{JsString, Json, Writes}
+
+import scala.collection.immutable.TreeSet
+
+
+sealed trait RelatedAction
+object RelatedAction {
+ case object Init extends RelatedAction
+ case class Save(blobId: BlobId, reference: ExternalID) extends RelatedAction
+ case class Delete(reference: ExternalID) extends RelatedAction
+ case object GarbageCollect extends RelatedAction
+}
+
+object JsonReport {
+ case class BlobId(id : String, `reference-generation`: Generation)
+
+ case class Reference(id : String, blob: String, `reference-generation`: Generation)
+ case class Dereference(blob: String, `reference-generation`: Generation, `garbage-collection-iterations`: String)
+
+ case class State(`related-action` : RelatedAction,
+ `reference-generations`: TreeSet[Generation],
+ `garbage-collection-iterations`: TreeSet[String],
+ blobs: Seq[BlobId],
+ references: Seq[Reference],
+ deletions: Seq[Dereference])
+
+
+ //action
+ implicit val relatedActionWrites: Writes[RelatedAction] = {
+ case Init => JsString("init")
+ case Save(blobId, reference) => JsString(s"save(blob = ${blobId.asString()}, reference = ${reference.id})")
+ case Delete(reference) => JsString(s"delete(reference = ${reference.id})")
+ case GarbageCollect => JsString(s"garbageCollect")
+ }
+ //generation
+ implicit val generationWrites: Writes[Generation] = {
+ case ValidGeneration(id) => JsString(s"$id")
+ case NonExistingGeneration => JsString(s"nonExistingGen")
+ }
+ //blobid
+ implicit val blobIdWrites: Writes[BlobId] = Json.writes[BlobId]
+ //reference
+ implicit val referenceWrites: Writes[Reference] = Json.writes[Reference]
+ //dereference
+ implicit val dereferenceWrites: Writes[Dereference] = Json.writes[Dereference]
+ //JsonReport.State
+ implicit val stateWrites: Writes[State] = Json.writes[State]
+ //JsonReport
+ implicit val reportWrites: Writes[JsonReport] = Json.writes[JsonReport]
+
+}
+
+case class JsonReport(states: Seq[JsonReport.State])
+
+
+sealed trait EventToReport
+
+case class ReferenceEvent(event : Reference) extends EventToReport
+case class DereferenceEvent(event : Dereference) extends EventToReport
+case class GCIterationEvent(event : GCIterationReport) extends EventToReport
+
+object EventToReport {
+ def extractReferencingEvents(events: Seq[EventToReport]): Seq[Event] =
+ events.flatMap {
+ case ReferenceEvent(reference) => Some(reference)
+ case DereferenceEvent(dereference) => Some(dereference)
+ case GCIterationEvent(_) => None
+ }
+ def toReportEvents(events: Seq[Event]): Seq[EventToReport] =
+ events.map {
+ case reference: Reference => ReferenceEvent(reference)
+ case dereference: Dereference => DereferenceEvent(dereference)
+ }
+}
+
+object GCJsonReporter {
+
+ def report(events: Seq[EventToReport]) : JsonReport = {
+
+ events.foldLeft((Seq[EventToReport](), JsonReport(Seq(JsonReport.State(Init,
+ TreeSet(Generation.first),
+ TreeSet(Iteration.initial.asString),
+ Seq[JsonReport.BlobId](),
+ Nil,
+ Nil)))))((acc, event) => {
+ val (events, reportStates) = acc
+ val currentEvents = events :+ event
+
+ val state : JsonReport.State = event match {
+ case ReferenceEvent(reference) =>
+ stateForReference(reportStates, reference)
+ case DereferenceEvent(dereference) =>
+ stateForDereference(reportStates, dereference)
+ case GCIterationEvent(gcReports) =>
+ val curatedAcc = (EventToReport.extractReferencingEvents(acc._1), acc._2)
+ stateForGCIteration(curatedAcc, EventToReport.extractReferencingEvents(events), gcReports)
+ }
+
+ (currentEvents, JsonReport(reportStates.states :+ state))
+ })._2
+
+
+ }
+
+ private def stateForGCIteration(acc: (Seq[Event], JsonReport), events: Seq[Event], gcReports: GCIterationReport) = {
+ val lastState = acc._2.states.last
+
+ val blobsToDeleteAsString = gcReports.blobsToDelete.map(_._2).map(_.asString())
+
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = lastState.`reference-generations`,
+ `garbage-collection-iterations` = lastState.`garbage-collection-iterations` + gcReports.iteration.asString,
+ blobs = lastState.blobs.diff(gcReports.blobsToDelete.map { case (generation, blobId) => JsonReport.BlobId(blobId.asString, generation) }.toSeq),
+ references = lastState.references.filterNot(reference => blobsToDeleteAsString.contains(reference.blob)),
+ deletions = lastState.deletions.filterNot(dereference => blobsToDeleteAsString.contains(dereference.blob)))
+ }
+
+ private def stateForDereference(reportStates: JsonReport, dereference: Dereference) = {
+ val previousState = reportStates.states.last
+ val referenceGenerations = previousState.`reference-generations` + dereference.generation
+ val iterations = previousState.`garbage-collection-iterations`
+ val references = previousState.references
+ val lastIteration = previousState.`garbage-collection-iterations`.last
+ val dereferences = previousState.deletions :+ JsonReport.Dereference(dereference.blob.asString(), dereference.generation, lastIteration)
+
+ JsonReport.State(Delete(dereference.externalId),
+ `reference-generations` = referenceGenerations,
+ `garbage-collection-iterations` = iterations,
+ blobs = previousState.blobs,
+ references = references,
+ deletions = dereferences)
+ }
+
+ private def stateForReference(reportStates: JsonReport, add: Reference) = {
+ val previousState = reportStates.states.last
+ val referenceGenerations = previousState.`reference-generations` + add.generation
+ val iterations = previousState.`garbage-collection-iterations`
+ val blobId = JsonReport.BlobId(add.blobId.asString(), add.generation)
+ val blobs = if (previousState.blobs.contains(blobId))
+ previousState.blobs
+ else
+ previousState.blobs :+ JsonReport.BlobId(add.blobId.asString(), add.generation)
+ val references = previousState.references :+ JsonReport.Reference(add.externalId.id, add.blobId.asString(), add.generation)
+ val deletions = previousState.deletions
+
+ JsonReport.State(Save(add.blobId, add.externalId), referenceGenerations, iterations, blobs, references, deletions)
+ }
+}
diff --git a/server/blob/blob-deduplicating/src/test/resources/gcReport.json b/server/blob/blob-deduplicating/src/test/resources/gcReport.json
new file mode 100644
index 0000000..8c87a57
--- /dev/null
+++ b/server/blob/blob-deduplicating/src/test/resources/gcReport.json
@@ -0,0 +1,63 @@
+{
+ "states" : [ {
+ "related-action" : "init",
+ "reference-generations" : [ "0" ],
+ "garbage-collection-iterations" : [ "0" ],
+ "blobs" : [ ],
+ "references" : [ ],
+ "deletions" : [ ]
+ }, {
+ "related-action" : "save(blob = 0_myHash, reference = message1)",
+ "reference-generations" : [ "0" ],
+ "garbage-collection-iterations" : [ "0" ],
+ "blobs" : [ {
+ "id" : "0_myHash",
+ "reference-generation" : "0"
+ } ],
+ "references" : [ {
+ "id" : "message1",
+ "blob" : "0_myHash",
+ "reference-generation" : "0"
+ } ],
+ "deletions" : [ ]
+ }, {
+ "related-action" : "garbageCollect",
+ "reference-generations" : [ "0" ],
+ "garbage-collection-iterations" : [ "0", "1" ],
+ "blobs" : [ {
+ "id" : "0_myHash",
+ "reference-generation" : "0"
+ } ],
+ "references" : [ {
+ "id" : "message1",
+ "blob" : "0_myHash",
+ "reference-generation" : "0"
+ } ],
+ "deletions" : [ ]
+ }, {
+ "related-action" : "delete(reference = message1)",
+ "reference-generations" : [ "0", "1" ],
+ "garbage-collection-iterations" : [ "0", "1" ],
+ "blobs" : [ {
+ "id" : "0_myHash",
+ "reference-generation" : "0"
+ } ],
+ "references" : [ {
+ "id" : "message1",
+ "blob" : "0_myHash",
+ "reference-generation" : "0"
+ } ],
+ "deletions" : [ {
+ "blob" : "0_myHash",
+ "reference-generation" : "1",
+ "garbage-collection-iterations" : "1"
+ } ]
+ }, {
+ "related-action" : "garbageCollect",
+ "reference-generations" : [ "0", "1" ],
+ "garbage-collection-iterations" : [ "0", "1", "2" ],
+ "blobs" : [ ],
+ "references" : [ ],
+ "deletions" : [ ]
+ } ]
+}
\ No newline at end of file
diff --git a/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCJsonReporterTest.scala b/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCJsonReporterTest.scala
new file mode 100644
index 0000000..c5961c2
--- /dev/null
+++ b/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCJsonReporterTest.scala
@@ -0,0 +1,190 @@
+/***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.server.blob.deduplication
+
+import java.time.Instant
+
+import org.apache.james.server.blob.deduplication.RelatedAction.{Delete, GarbageCollect, Init, Save}
+import org.apache.james.util.ClassLoaderUtils
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
+import play.api.libs.json.Json
+
+import scala.collection.immutable.TreeSet
+
+class GCJsonReporterTest extends AnyWordSpec with Matchers {
+ "Report" should {
+ val generation = Generation.first
+ val blobId = GenerationAwareBlobId(generation, "myHash")
+ val externalId = ExternalID("message1")
+
+ val initialIteration = "0"
+ val firstIteration = "1"
+ val initialReport = JsonReport.State(Init,
+ `reference-generations` = TreeSet(Generation.first),
+ `garbage-collection-iterations` = TreeSet(initialIteration),
+ blobs = Seq[JsonReport.BlobId](),
+ references = Nil,
+ deletions = Nil)
+ val firstSaveReport = JsonReport.State(Save(blobId, externalId),
+ `reference-generations` = TreeSet(generation),
+ `garbage-collection-iterations` = TreeSet(initialIteration),
+ blobs = Seq[JsonReport.BlobId](JsonReport.BlobId(blobId.asString, blobId.generation)),
+ references = Seq(JsonReport.Reference(externalId.id, blobId.asString, generation)),
+ deletions = Nil)
+ val firstDeleteReport = JsonReport.State(Delete(externalId),
+ `reference-generations` = TreeSet(generation),
+ `garbage-collection-iterations` = TreeSet(initialIteration),
+ blobs = Seq[JsonReport.BlobId](JsonReport.BlobId(blobId.asString, blobId.generation)),
+ references = Seq(JsonReport.Reference(externalId.id, blobId.asString, generation)),
+ deletions = Seq(JsonReport.Dereference(blobId.asString(), generation, initialIteration)))
+
+ val iterationForImmediateGC = Iteration(1L, Set(), generation)
+ val gcReportImmediate = GCIterationReport(iterationForImmediateGC, Set())
+
+ "be minimal" when {
+ "on initial state" in {
+ GCJsonReporter
+ .report(GCIterationEvent(gcReportImmediate) :: Nil)
+ .states should be (Seq(initialReport,
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = TreeSet(Generation.first),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration),
+ blobs = Seq[JsonReport.BlobId](),
+ references = Nil,
+ deletions = Nil)))
+ }
+ }
+
+ "report with added references" when {
+ "one reference is added" in {
+ GCJsonReporter
+ .report(ReferenceEvent(Reference(externalId, blobId, generation)) :: GCIterationEvent(gcReportImmediate) :: Nil)
+ .states should be (Seq(
+ initialReport,
+ firstSaveReport,
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = TreeSet(generation),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration),
+ blobs = Seq[JsonReport.BlobId](JsonReport.BlobId(blobId.asString, blobId.generation)),
+ references = Seq(JsonReport.Reference(externalId.id, blobId.asString, generation)),
+ deletions = Nil )))
+ }
+
+ "one reference is added then removed" in {
+ val reference = Reference(externalId, blobId, generation)
+ GCJsonReporter.report(ReferenceEvent(reference) :: DereferenceEvent(Dereference(generation, reference)) :: GCIterationEvent(gcReportImmediate) :: Nil)
+ .states should be (Seq(
+ initialReport,
+ firstSaveReport,
+ firstDeleteReport,
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = TreeSet(generation),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration),
+ blobs = Seq[JsonReport.BlobId](JsonReport.BlobId(blobId.asString, blobId.generation)),
+ references = Seq(JsonReport.Reference(externalId.id, blobId.asString, generation)),
+ deletions = Seq(JsonReport.Dereference(blobId.asString(), generation, initialIteration)))))
+ }
+ }
+
+ "GC has been ran" when {
+ "report added and removed references" when {
+ "one reference is added then removed and the GC is ran 2 generation later" in {
+ val reference = Reference(externalId, blobId, generation)
+ val gcReportGenNPlus2 = GC.plan(StabilizedState(Map(generation -> List(reference)), Map(generation -> List(Dereference(generation, reference)))),
+ lastIteration = Iteration.initial,
+ targetedGeneration = generation.next(2))
+
+ GCJsonReporter.report(ReferenceEvent(reference) :: DereferenceEvent(Dereference(generation, reference)) :: GCIterationEvent(gcReportGenNPlus2) :: Nil)
+ .states should be (Seq(
+ initialReport,
+ firstSaveReport,
+ firstDeleteReport,
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = TreeSet(generation),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration),
+ blobs = Nil,
+ references = Nil,
+ deletions = Nil )))
+ }
+
+ "one reference is added, a gc run two generation later, then it is removed and the GC is ran again" in {
+ val reference = Reference(externalId, blobId, generation)
+ val gcReportGenNPlus2 = GC.plan(StabilizedState(Map(generation -> List(reference)), Map.empty),
+ lastIteration = Iteration.initial,
+ targetedGeneration = generation.next(2))
+
+ val generationPlusOne= generation.next
+ val dereference = Dereference(generation.next, reference)
+ val gcReportGenNPlus3 = GC.plan(StabilizedState(Map(generation -> List(reference)), Map(generationPlusOne -> List(dereference))),
+ lastIteration = gcReportGenNPlus2.iteration,
+ targetedGeneration = generationPlusOne.next(2))
+
+ GCJsonReporter.report(ReferenceEvent(reference) :: GCIterationEvent(gcReportGenNPlus2) :: DereferenceEvent(dereference) :: GCIterationEvent(gcReportGenNPlus3) :: Nil)
+ .states should be (Seq(
+ initialReport,
+ firstSaveReport,
+ //first gc
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = TreeSet(generation),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration),
+ blobs = Seq[JsonReport.BlobId](JsonReport.BlobId(blobId.asString, blobId.generation)),
+ references = Seq(JsonReport.Reference(externalId.id, blobId.asString, generation)),
+ deletions = Nil),
+ //delete
+ JsonReport.State(Delete(externalId),
+ `reference-generations` = TreeSet(generation, generationPlusOne),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration),
+ blobs = Seq[JsonReport.BlobId](JsonReport.BlobId(blobId.asString, blobId.generation)),
+ references = Seq(JsonReport.Reference(externalId.id, blobId.asString, generation)),
+ deletions = Seq(JsonReport.Dereference(blobId.asString(), generationPlusOne, gcReportGenNPlus2.iteration.asString))),
+ //second gc
+ JsonReport.State(GarbageCollect,
+ `reference-generations` = TreeSet(generation, generationPlusOne),
+ `garbage-collection-iterations` = TreeSet(initialIteration, firstIteration, gcReportGenNPlus3.iteration.asString),
+ blobs = Nil,
+ references = Nil,
+ deletions = Nil)))
+ }
+
+
+
+ "json serialization" in {
+ val reference = Reference(externalId, blobId, generation)
+ val gcReportGenNPlus2 = GC.plan(StabilizedState(Map(generation -> List(reference)), Map.empty),
+ lastIteration = Iteration.initial,
+ targetedGeneration = generation.next(2))
+
+ val generationPlusOne= generation.next
+ val dereference = Dereference(generation.next, reference)
+ val gcReportGenNPlus3 = GC.plan(StabilizedState(Map(generation -> List(reference)), Map(generationPlusOne -> List(dereference))),
+ lastIteration = gcReportGenNPlus2.iteration,
+ targetedGeneration = generationPlusOne.next(2))
+
+ import JsonReport._
+
+ val actualJson = Json.toJson(GCJsonReporter.report(ReferenceEvent(reference) :: GCIterationEvent(gcReportGenNPlus2) :: DereferenceEvent(dereference) :: GCIterationEvent(gcReportGenNPlus3) :: Nil))
+
+ actualJson should equal(Json.parse(ClassLoaderUtils.getSystemResourceAsString("gcReport.json")))
+ }
+ }
+ }
+ }
+}
diff --git a/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCPropertiesTest.scala b/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCPropertiesTest.scala
index ad90f1c..d2ca107 100644
--- a/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCPropertiesTest.scala
+++ b/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/GCPropertiesTest.scala
@@ -21,16 +21,11 @@ package org.apache.james.server.blob.deduplication
import java.nio.charset.StandardCharsets
import com.google.common.hash
-import org.apache.james.blob.api.BlobId
import org.apache.james.server.blob.deduplication.Generators.{OnePassGCTestParameters, TestParameters}
import org.scalacheck.Prop.forAll
import org.scalacheck.Test.Parameters
import org.scalacheck.{Arbitrary, Gen, Properties, Shrink}
-case class GenerationAwareBlobId(generation: Generation, hash: String) extends BlobId {
- override def asString(): String = s"${generation}_$hash"
-}
-
object Generators {
// generate a sequence of Generations with monotonic numeric ids
@@ -171,7 +166,7 @@ object GCPropertiesTest extends Properties("GC") {
property("2.1. GC should not delete data being referenced by a pending process or still referenced") = forAll {
testParameters: Generators.TestParameters => {
- val partitionedBlobsId = partitionBlobs(testParameters.events)
+ val partitionedBlobsId = Oracle.partitionBlobs(testParameters.events)
testParameters.generationsToCollect.foldLeft(true)((acc, e) => {
val plannedDeletions = GC.plan(Interpreter(testParameters.events).stabilize(), Iteration.initial, e).blobsToDelete.map(_._2)
acc && partitionedBlobsId.stillReferencedBlobIds.intersect(plannedDeletions).isEmpty
@@ -189,40 +184,11 @@ object GCPropertiesTest extends Properties("GC") {
val relevantEvents: Event => Boolean = event => event.generation <= testParameters.generationToCollect.previous(GC.temporization)
val plannedDeletions = plan.blobsToDelete.map(_._2)
- val partitionedBlobsId = partitionBlobs(testParameters.events.filter(relevantEvents))
+ val partitionedBlobsId = Oracle.partitionBlobs(testParameters.events.filter(relevantEvents))
plannedDeletions.size >= partitionedBlobsId.notReferencedBlobIds.size * 0.9
}
}
}
-
- /*
- Implement an oracle that implements BlobStore with a Ref Count reference tracking
- */
- def partitionBlobs(events: Seq[Event]): PartitionedEvents = {
- val (referencingEvents, dereferencingEvents) = events.partition {
- case _: Reference => true
- case _: Dereference => false
- }
-
- val referencedBlobsCount = referencingEvents.groupBy(_.blob).view.mapValues(_.size).toMap
- val dereferencedBlobsCount = dereferencingEvents.groupBy(_.blob).view.mapValues(_.size).toMap
-
- val stillReferencedBlobIds = referencedBlobsCount.foldLeft(Set[BlobId]())((acc, kv) => {
- val (blobId, referencesCount) = kv
- val dereferencesCount = dereferencedBlobsCount.getOrElse(blobId, 0)
-
- if(referencesCount > dereferencesCount)
- acc + blobId
- else
- acc
- })
-
- lazy val notReferencedBlobIds = dereferencedBlobsCount.keySet -- stillReferencedBlobIds
- PartitionedEvents(stillReferencedBlobIds, notReferencedBlobIds)
- }
-
- case class PartitionedEvents(stillReferencedBlobIds: Set[BlobId], notReferencedBlobIds: Set[BlobId])
-
}
diff --git a/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/State.scala b/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/State.scala
index 119d0cf..bcd9111 100644
--- a/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/State.scala
+++ b/server/blob/blob-deduplicating/src/test/scala/org/apache/james/server/blob/deduplication/State.scala
@@ -19,6 +19,14 @@
package org.apache.james.server.blob.deduplication
+import org.apache.james.blob.api.BlobId
+
+case class GenerationAwareBlobId(generation: Generation, hash: String) extends BlobId {
+ override def asString(): String = s"${generation.asString}_$hash"
+}
+
+case class PartitionedEvents(stillReferencedBlobIds: Set[BlobId], notReferencedBlobIds: Set[BlobId])
+
/**
* Used to iteratively build a StabilizedState
*/
@@ -44,3 +52,32 @@ object Interpreter {
def apply(events: Seq[Event]): State =
events.foldLeft(State.initial)((state, event) => state(event))
}
+
+object Oracle {
+ /*
+ Implement an oracle that implements BlobStore with a Ref Count reference tracking
+ */
+ def partitionBlobs(events: Seq[Event]): PartitionedEvents = {
+ val (referencingEvents, dereferencingEvents) = events.partition {
+ case _: Reference => true
+ case _: Dereference => false
+ }
+
+ val referencedBlobsCount = referencingEvents.groupBy(_.blob).view.mapValues(_.size).toMap
+ val dereferencedBlobsCount = dereferencingEvents.groupBy(_.blob).view.mapValues(_.size).toMap
+
+ val stillReferencedBlobIds = referencedBlobsCount.foldLeft(Set[BlobId]())((acc, kv) => {
+ val (blobId, referencesCount) = kv
+ val dereferencesCount = dereferencedBlobsCount.getOrElse(blobId, 0)
+
+ if(referencesCount > dereferencesCount)
+ acc + blobId
+ else
+ acc
+ })
+
+ lazy val notReferencedBlobIds = dereferencedBlobsCount.keySet -- stillReferencedBlobIds
+ PartitionedEvents(stillReferencedBlobIds, notReferencedBlobIds)
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org