You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@openwhisk.apache.org by GitBox <gi...@apache.org> on 2018/03/09 15:08:13 UTC

[GitHub] cbickel closed pull request #3382: Cleanup script for unused entities in the whisks database.

cbickel closed pull request #3382: Cleanup script for unused entities in the whisks database.
URL: https://github.com/apache/incubator-openwhisk/pull/3382
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
new file mode 100644
index 0000000000..1df1a485ec
--- /dev/null
+++ b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package whisk.core.database.test
+
+import java.io.File
+import java.time.Instant
+import java.time.temporal.ChronoUnit
+
+import common.{StreamLogging, TestUtils, WhiskProperties, WskActorSystem}
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+import org.scalatest.{FlatSpec, Matchers}
+import pureconfig.loadConfigOrThrow
+import spray.json._
+import spray.json.DefaultJsonProtocol._
+import whisk.core.database.CouchDbConfig
+//import whisk.core.{ConfigKeys, WhiskConfig}
+import whisk.core.ConfigKeys
+import whisk.core.entity._
+
+@RunWith(classOf[JUnitRunner])
+class CleanUpWhisksDbSkriptTests
+    extends FlatSpec
+    with Matchers
+    with DatabaseScriptTestUtils
+    with WskActorSystem
+    with StreamLogging {
+
+  val cleanupScript = WhiskProperties.getFileRelativeToWhiskHome("tools/db/cleanUpWhisks.py").getAbsolutePath
+  val dbConfig = loadConfigOrThrow[CouchDbConfig](ConfigKeys.couchdb)
+  val authDBName = dbConfig.databaseFor[WhiskAuth]
+
+  def runScript(dbUrl: String, whisksDbName: String, subjectsDbName: String) = {
+    println(s"Running script: $dbUrl, $whisksDbName, $subjectsDbName")
+
+    val cmd =
+      Seq(
+        python,
+        cleanupScript,
+        "--dbUrl",
+        dbUrl,
+        "--dbNameWhisks",
+        whisksDbName,
+        "--dbNameSubjects",
+        subjectsDbName,
+        "--days",
+        "1",
+        "--docsPerRequest",
+        "1")
+
+    val rr = TestUtils.runCmd(0, new File("."), cmd: _*)
+
+    val Seq(marked, deleted, skipped, kept) =
+      Seq("marking: ", "deleting: ", "skipping: ", "keeping: ").map { linePrefix =>
+        rr.stdout.lines.collect {
+          case line if line.startsWith(linePrefix) => line.replace(linePrefix, "")
+        }.toList
+      }
+
+    println(s"marked:  $marked")
+    println(s"deleted: $deleted")
+    println(s"skipped: $skipped")
+    println(s"kept:    $kept")
+
+    (marked, deleted, skipped, kept)
+  }
+
+  behavior of "Cleanup whisksDb script"
+
+  it should "mark documents for deletion if namespace does not exist" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with random namespace
+    val documents = Map(
+      "whisksCleanTests/utils/actionName1" -> JsObject("namespace" -> JsString("whisksCleanTests/utils")),
+      "whisksCleanTests/utils/actionName2" -> JsObject("namespace" -> JsString("whisksCleanTests")),
+      "whisksCleanTests/actionName3" -> JsObject("namespace" -> JsString("whisksCleanTests")))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (marked, _, _, _) = runScript(dbUrl, dbName, authDBName)
+    println(s"marked: $marked")
+
+    // Check, that script marked document to be deleted: output + document from DB
+    val ids = documents.keys
+    println(s"ids: $ids")
+    marked should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+    databaseResponse should be('right)
+    val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+    databaseDocuments.foreach { doc =>
+      doc.fields("doc").asJsObject.fields.keys should contain("markedForDeletion")
+    }
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "delete marked for deletion documents if namespace does not exists" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_delete_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with random namespace and markedForDeletion field
+    val documents = Map(
+      "whisksCleanTests/utils/actionName1" -> JsObject(
+        "namespace" -> JsString("whisksCleanTests/utils"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+      "whisksCleanTests/utils/actionName2" -> JsObject(
+        "namespace" -> JsString("whisksCleanTests"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+      "whisksCleanTests/actionName3" -> JsObject(
+        "namespace" -> JsString("whisksCleanTests"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (marked, deleted, _, _) = runScript(dbUrl, dbName, authDBName)
+    println(s"marked: $marked")
+    println(s"deleted: $deleted")
+
+    // Check, that script deleted already marked documents from DB
+    val ids = documents.keys
+    println(s"ids: $ids")
+    marked shouldBe empty
+
+    val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+    databaseDocuments shouldBe empty
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "not mark documents for deletion if namespace does exist" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_not_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with whisk-system namespace
+    val documents = Map(
+      "whisk.system/utils" -> JsObject("namespace" -> JsString("whisk.system")),
+      "whisk.system/samples/helloWorld" -> JsObject("namespace" -> JsString("whisk.system/samples")),
+      "whisk.system/utils/namespace" -> JsObject("namespace" -> JsString("whisk.system/utils")))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName)
+    println(s"kept: $kept")
+
+    // Check, that script did not mark documents for deletion
+    val ids = documents.keys
+    println(s"ids: $ids")
+    kept should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+    val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String])
+    databaseDocumentIDs should contain allElementsOf ids
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "skip design documents" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_skip_design_documents"
+    val client = createDatabase(dbName, None)
+
+    // Create design documents
+    val documents = Map(
+      "_design/all-whisks.v2.1.0" -> JsObject("language" -> JsString("javascript")),
+      "_design/snapshotFilters" -> JsObject("language" -> JsString("javascript")),
+      "_design/whisks.v2.1.0" -> JsObject("language" -> JsString("javascript")))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (_, _, skipped, _) = runScript(dbUrl, dbName, authDBName)
+    println(s"skipped: $skipped")
+
+    // Check, that script skipped design documents
+    val ids = documents.keys
+    println(s"ids: $ids")
+    skipped should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+    val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String])
+    databaseDocumentIDs should contain allElementsOf ids
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+
+  it should "not delete marked for deletion documents if namespace does exists" in {
+    // Create whisks db
+    val dbName = dbPrefix + "cleanup_whisks_test_not_delete_mark_for_deletion"
+    val client = createDatabase(dbName, None)
+
+    // Create document/action with whisk-system namespace and markedForDeletion field
+    val documents = Map(
+      "whisk.system/utils" -> JsObject(
+        "namespace" -> JsString("whisk.system"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+      "whisk.system/samples/helloWorld" -> JsObject(
+        "namespace" -> JsString("whisk.system/samples"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+      "whisk.system/utils/namespace" -> JsObject(
+        "namespace" -> JsString("whisk.system/utils"),
+        "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)))
+
+    documents.foreach {
+      case (id, document) =>
+        client.putDoc(id, document).futureValue
+    }
+
+    // execute script
+    val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName)
+    println(s"kept: $kept")
+
+    // Check, that script kept documents in DB
+    val ids = documents.keys
+    println(s"ids: $ids")
+    kept should contain allElementsOf ids
+
+    val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+    databaseResponse should be('right)
+
+    val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+    val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String])
+    databaseDocumentIDs should contain allElementsOf ids
+
+    // Delete database
+    client.deleteDb().futureValue
+  }
+}
diff --git a/tools/db/cleanUpWhisks.py b/tools/db/cleanUpWhisks.py
new file mode 100755
index 0000000000..83b47bb6a0
--- /dev/null
+++ b/tools/db/cleanUpWhisks.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+"""Python script to delete whisks entries having none existent ns.
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+"""
+
+import argparse
+import time
+import couchdb.client
+
+skipWhisks = 0
+
+try:
+    long        # Python 2
+except NameError:
+    long = int  # Python 3
+
+HOUR = 1000 * 60 * 60
+DAY = HOUR * 24
+
+#
+# simple ring buffer like list
+#
+class SimpleRingBuffer:
+    def __init__(self, size):
+        self.index = -1
+        self.data = []
+        self.maxsize = size * 2
+
+    def append(self, ns, bool):
+
+        self.index=(self.index+2)%self.maxsize
+
+        if len(self.data) < self.maxsize:
+            self.data.append(ns)
+            self.data.append(bool)
+        else:
+            self.data[self.index-1]=ns
+            self.data[(self.index)]=bool
+
+    def getself(self):
+        return self.data
+
+    def get(self, ns):
+        if ns in self.data:
+            return self.data[self.data.index(ns)+1]
+        else:
+            return None
+
+#
+# mark whisks entry for deletion of delete if already marked
+#
+def deleteWhisk(dbWhisks, wdoc):
+
+    global skipWhisks
+
+    wdocd = dbWhisks[wdoc['id']]
+    if not 'markedForDeletion' in wdocd:
+        print('marking: {0}'.format(wdoc['id']))
+        dts = int(time.time() * 1000)
+        wdocd['markedForDeletion'] = dts
+        dbWhisks.save(wdocd)
+    else:
+        dts = wdocd['markedForDeletion']
+        now = int(time.time() * 1000)
+        elapsedh = int((now - dts) / HOUR)
+        elapsedd = int((now - dts) / DAY)
+
+        if elapsedd >= args.days:
+            print('deleting: {0}'.format(wdoc['id']))
+            dbWhisks.delete(wdocd)
+            skipWhisks-=1
+        else:
+            print('marked: {0}, elapsed hours: {1}, elapsed days: {2}'.format(wdoc['id'], elapsedh, elapsedd))
+
+
+#
+# check subjects db for existence of ns
+#
+def checkNamespace(dbSubjects, namespace):
+
+    while True:
+
+        allNamespaces = dbSubjects.view('subjects/identities', startkey=[namespace], endkey=[namespace])
+
+        if allNamespaces:
+            return True
+        else:
+            return False
+
+
+#
+# check whisks db for entries having none existent ns
+#
+def checkWhisks(args):
+
+    dbWhisks = couchdb.client.Server(args.dbUrl)[args.dbNameWhisks]
+    dbSubjects = couchdb.client.Server(args.dbUrl)[args.dbNameSubjects]
+
+    rb = SimpleRingBuffer(args.bufferLen)
+
+    global skipWhisks
+    while True:
+        allWhisks = dbWhisks.view('_all_docs', limit=args.docsPerRequest, skip=skipWhisks)
+        skipWhisks += args.docsPerRequest
+        if allWhisks:
+            for wdoc in allWhisks:
+                if wdoc['id'].startswith('_design/'):
+                    print('skipping: {0}'.format(wdoc['id']))
+                    continue
+                namespace = wdoc['id'][0:wdoc['id'].find('/')]
+
+                exists = rb.get(namespace)
+                if exists == None:
+                    exists = checkNamespace(dbSubjects, namespace)
+                    rb.append(namespace, exists)
+
+                if exists:
+                    print('keeping: {0}'.format(wdoc['id']))
+                else:
+                    deleteWhisk(dbWhisks, wdoc)
+        else:
+            return
+
+
+parser = argparse.ArgumentParser(description="Utility to mark/delete whisks entries where the ns does not exist in the subjects database.")
+parser.add_argument("--dbUrl", required=True, help="Server URL of the database, that has to be cleaned of old activations. E.g. 'https://xxx:yyy@domain.couch.com:443'")
+parser.add_argument("--dbNameWhisks", required=True, help="Name of the Whisks Database of the whisks entries to be marked for deletion or deleted if already marked.")
+parser.add_argument("--dbNameSubjects", required=True, help="Name of the Subjects Database.")
+parser.add_argument("--days", required=True, type=int, default=7, help="How many days whisks keep entries marked for deletion before deleting them.")
+parser.add_argument("--docsPerRequest", type=int, default=200, help="Number of documents handled on each CouchDb Request. Default is 200.")
+parser.add_argument("--bufferLen", type=int, default=100, help="Maximum buffer length to cache already checked ns. Default is 100.")
+args = parser.parse_args()
+
+checkWhisks(args)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services