You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@openwhisk.apache.org by cb...@apache.org on 2018/03/09 15:08:14 UTC
[incubator-openwhisk] branch master updated: Cleanup script for
unused entities in the whisks database. (#3382)
This is an automated email from the ASF dual-hosted git repository.
cbickel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git
The following commit(s) were added to refs/heads/master by this push:
new fbc0091 Cleanup script for unused entities in the whisks database. (#3382)
fbc0091 is described below
commit fbc009170295a23473b1ee390985d4b4dda4aad0
Author: Steffen Rost <lu...@freenet.de>
AuthorDate: Fri Mar 9 16:08:11 2018 +0100
Cleanup script for unused entities in the whisks database. (#3382)
python script to cleanup whisk db if namespace does not exist anymore
Co-authored-by: Christian Bickel <gi...@cbickel.de>
---
.../database/test/CleanUpWhisksDbSkriptTests.scala | 279 +++++++++++++++++++++
tools/db/cleanUpWhisks.py | 150 +++++++++++
2 files changed, 429 insertions(+)
diff --git a/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
new file mode 100644
index 0000000..1df1a48
--- /dev/null
+++ b/tests/src/test/scala/whisk/core/database/test/CleanUpWhisksDbSkriptTests.scala
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package whisk.core.database.test
+
+import java.io.File
+import java.time.Instant
+import java.time.temporal.ChronoUnit
+
+import common.{StreamLogging, TestUtils, WhiskProperties, WskActorSystem}
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+import org.scalatest.{FlatSpec, Matchers}
+import pureconfig.loadConfigOrThrow
+import spray.json._
+import spray.json.DefaultJsonProtocol._
+import whisk.core.database.CouchDbConfig
+//import whisk.core.{ConfigKeys, WhiskConfig}
+import whisk.core.ConfigKeys
+import whisk.core.entity._
+
+@RunWith(classOf[JUnitRunner])
+class CleanUpWhisksDbSkriptTests
+ extends FlatSpec
+ with Matchers
+ with DatabaseScriptTestUtils
+ with WskActorSystem
+ with StreamLogging {
+
+ val cleanupScript = WhiskProperties.getFileRelativeToWhiskHome("tools/db/cleanUpWhisks.py").getAbsolutePath
+ val dbConfig = loadConfigOrThrow[CouchDbConfig](ConfigKeys.couchdb)
+ val authDBName = dbConfig.databaseFor[WhiskAuth]
+
+ def runScript(dbUrl: String, whisksDbName: String, subjectsDbName: String) = {
+ println(s"Running script: $dbUrl, $whisksDbName, $subjectsDbName")
+
+ val cmd =
+ Seq(
+ python,
+ cleanupScript,
+ "--dbUrl",
+ dbUrl,
+ "--dbNameWhisks",
+ whisksDbName,
+ "--dbNameSubjects",
+ subjectsDbName,
+ "--days",
+ "1",
+ "--docsPerRequest",
+ "1")
+
+ val rr = TestUtils.runCmd(0, new File("."), cmd: _*)
+
+ val Seq(marked, deleted, skipped, kept) =
+ Seq("marking: ", "deleting: ", "skipping: ", "keeping: ").map { linePrefix =>
+ rr.stdout.lines.collect {
+ case line if line.startsWith(linePrefix) => line.replace(linePrefix, "")
+ }.toList
+ }
+
+ println(s"marked: $marked")
+ println(s"deleted: $deleted")
+ println(s"skipped: $skipped")
+ println(s"kept: $kept")
+
+ (marked, deleted, skipped, kept)
+ }
+
+ behavior of "Cleanup whisksDb script"
+
+ it should "mark documents for deletion if namespace does not exist" in {
+ // Create whisks db
+ val dbName = dbPrefix + "cleanup_whisks_test_mark_for_deletion"
+ val client = createDatabase(dbName, None)
+
+ // Create document/action with random namespace
+ val documents = Map(
+ "whisksCleanTests/utils/actionName1" -> JsObject("namespace" -> JsString("whisksCleanTests/utils")),
+ "whisksCleanTests/utils/actionName2" -> JsObject("namespace" -> JsString("whisksCleanTests")),
+ "whisksCleanTests/actionName3" -> JsObject("namespace" -> JsString("whisksCleanTests")))
+
+ documents.foreach {
+ case (id, document) =>
+ client.putDoc(id, document).futureValue
+ }
+
+ // execute script
+ val (marked, _, _, _) = runScript(dbUrl, dbName, authDBName)
+ println(s"marked: $marked")
+
+ // Check, that script marked document to be deleted: output + document from DB
+ val ids = documents.keys
+ println(s"ids: $ids")
+ marked should contain allElementsOf ids
+
+ val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+ databaseResponse should be('right)
+ val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+ databaseDocuments.foreach { doc =>
+ doc.fields("doc").asJsObject.fields.keys should contain("markedForDeletion")
+ }
+
+ // Delete database
+ client.deleteDb().futureValue
+ }
+
+ it should "delete marked for deletion documents if namespace does not exists" in {
+ // Create whisks db
+ val dbName = dbPrefix + "cleanup_whisks_test_delete_mark_for_deletion"
+ val client = createDatabase(dbName, None)
+
+ // Create document/action with random namespace and markedForDeletion field
+ val documents = Map(
+ "whisksCleanTests/utils/actionName1" -> JsObject(
+ "namespace" -> JsString("whisksCleanTests/utils"),
+ "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+ "whisksCleanTests/utils/actionName2" -> JsObject(
+ "namespace" -> JsString("whisksCleanTests"),
+ "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+ "whisksCleanTests/actionName3" -> JsObject(
+ "namespace" -> JsString("whisksCleanTests"),
+ "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)))
+
+ documents.foreach {
+ case (id, document) =>
+ client.putDoc(id, document).futureValue
+ }
+
+ // execute script
+ val (marked, deleted, _, _) = runScript(dbUrl, dbName, authDBName)
+ println(s"marked: $marked")
+ println(s"deleted: $deleted")
+
+ // Check, that script deleted already marked documents from DB
+ val ids = documents.keys
+ println(s"ids: $ids")
+ marked shouldBe empty
+
+ val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+ databaseResponse should be('right)
+
+ val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+ databaseDocuments shouldBe empty
+
+ // Delete database
+ client.deleteDb().futureValue
+ }
+
+ it should "not mark documents for deletion if namespace does exist" in {
+ // Create whisks db
+ val dbName = dbPrefix + "cleanup_whisks_test_not_mark_for_deletion"
+ val client = createDatabase(dbName, None)
+
+ // Create document/action with whisk-system namespace
+ val documents = Map(
+ "whisk.system/utils" -> JsObject("namespace" -> JsString("whisk.system")),
+ "whisk.system/samples/helloWorld" -> JsObject("namespace" -> JsString("whisk.system/samples")),
+ "whisk.system/utils/namespace" -> JsObject("namespace" -> JsString("whisk.system/utils")))
+
+ documents.foreach {
+ case (id, document) =>
+ client.putDoc(id, document).futureValue
+ }
+
+ // execute script
+ val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName)
+ println(s"kept: $kept")
+
+ // Check, that script did not mark documents for deletion
+ val ids = documents.keys
+ println(s"ids: $ids")
+ kept should contain allElementsOf ids
+
+ val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+ databaseResponse should be('right)
+
+ val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+ val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String])
+ databaseDocumentIDs should contain allElementsOf ids
+
+ // Delete database
+ client.deleteDb().futureValue
+ }
+
+ it should "skip design documents" in {
+ // Create whisks db
+ val dbName = dbPrefix + "cleanup_whisks_test_skip_design_documents"
+ val client = createDatabase(dbName, None)
+
+ // Create design documents
+ val documents = Map(
+ "_design/all-whisks.v2.1.0" -> JsObject("language" -> JsString("javascript")),
+ "_design/snapshotFilters" -> JsObject("language" -> JsString("javascript")),
+ "_design/whisks.v2.1.0" -> JsObject("language" -> JsString("javascript")))
+
+ documents.foreach {
+ case (id, document) =>
+ client.putDoc(id, document).futureValue
+ }
+
+ // execute script
+ val (_, _, skipped, _) = runScript(dbUrl, dbName, authDBName)
+ println(s"skipped: $skipped")
+
+ // Check, that script skipped design documents
+ val ids = documents.keys
+ println(s"ids: $ids")
+ skipped should contain allElementsOf ids
+
+ val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+ databaseResponse should be('right)
+
+ val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+ val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String])
+ databaseDocumentIDs should contain allElementsOf ids
+
+ // Delete database
+ client.deleteDb().futureValue
+ }
+
+ it should "not delete marked for deletion documents if namespace does exists" in {
+ // Create whisks db
+ val dbName = dbPrefix + "cleanup_whisks_test_not_delete_mark_for_deletion"
+ val client = createDatabase(dbName, None)
+
+ // Create document/action with whisk-system namespace and markedForDeletion field
+ val documents = Map(
+ "whisk.system/utils" -> JsObject(
+ "namespace" -> JsString("whisk.system"),
+ "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+ "whisk.system/samples/helloWorld" -> JsObject(
+ "namespace" -> JsString("whisk.system/samples"),
+ "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)),
+ "whisk.system/utils/namespace" -> JsObject(
+ "namespace" -> JsString("whisk.system/utils"),
+ "markedForDeletion" -> JsNumber(Instant.now().minus(8, ChronoUnit.DAYS).toEpochMilli)))
+
+ documents.foreach {
+ case (id, document) =>
+ client.putDoc(id, document).futureValue
+ }
+
+ // execute script
+ val (_, _, _, kept) = runScript(dbUrl, dbName, authDBName)
+ println(s"kept: $kept")
+
+ // Check, that script kept documents in DB
+ val ids = documents.keys
+ println(s"ids: $ids")
+ kept should contain allElementsOf ids
+
+ val databaseResponse = client.getAllDocs(includeDocs = Some(true)).futureValue
+ databaseResponse should be('right)
+
+ val databaseDocuments = databaseResponse.right.get.fields("rows").convertTo[List[JsObject]]
+
+ val databaseDocumentIDs = databaseDocuments.map(_.fields("id").convertTo[String])
+ databaseDocumentIDs should contain allElementsOf ids
+
+ // Delete database
+ client.deleteDb().futureValue
+ }
+}
diff --git a/tools/db/cleanUpWhisks.py b/tools/db/cleanUpWhisks.py
new file mode 100755
index 0000000..83b47bb
--- /dev/null
+++ b/tools/db/cleanUpWhisks.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+"""Python script to delete whisks entries having none existent ns.
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+"""
+
+import argparse
+import time
+import couchdb.client
+
+skipWhisks = 0
+
+try:
+ long # Python 2
+except NameError:
+ long = int # Python 3
+
+HOUR = 1000 * 60 * 60
+DAY = HOUR * 24
+
+#
+# simple ring buffer like list
+#
+class SimpleRingBuffer:
+ def __init__(self, size):
+ self.index = -1
+ self.data = []
+ self.maxsize = size * 2
+
+ def append(self, ns, bool):
+
+ self.index=(self.index+2)%self.maxsize
+
+ if len(self.data) < self.maxsize:
+ self.data.append(ns)
+ self.data.append(bool)
+ else:
+ self.data[self.index-1]=ns
+ self.data[(self.index)]=bool
+
+ def getself(self):
+ return self.data
+
+ def get(self, ns):
+ if ns in self.data:
+ return self.data[self.data.index(ns)+1]
+ else:
+ return None
+
+#
+# mark whisks entry for deletion of delete if already marked
+#
+def deleteWhisk(dbWhisks, wdoc):
+
+ global skipWhisks
+
+ wdocd = dbWhisks[wdoc['id']]
+ if not 'markedForDeletion' in wdocd:
+ print('marking: {0}'.format(wdoc['id']))
+ dts = int(time.time() * 1000)
+ wdocd['markedForDeletion'] = dts
+ dbWhisks.save(wdocd)
+ else:
+ dts = wdocd['markedForDeletion']
+ now = int(time.time() * 1000)
+ elapsedh = int((now - dts) / HOUR)
+ elapsedd = int((now - dts) / DAY)
+
+ if elapsedd >= args.days:
+ print('deleting: {0}'.format(wdoc['id']))
+ dbWhisks.delete(wdocd)
+ skipWhisks-=1
+ else:
+ print('marked: {0}, elapsed hours: {1}, elapsed days: {2}'.format(wdoc['id'], elapsedh, elapsedd))
+
+
+#
+# check subjects db for existence of ns
+#
+def checkNamespace(dbSubjects, namespace):
+
+ while True:
+
+ allNamespaces = dbSubjects.view('subjects/identities', startkey=[namespace], endkey=[namespace])
+
+ if allNamespaces:
+ return True
+ else:
+ return False
+
+
+#
+# check whisks db for entries having none existent ns
+#
+def checkWhisks(args):
+
+ dbWhisks = couchdb.client.Server(args.dbUrl)[args.dbNameWhisks]
+ dbSubjects = couchdb.client.Server(args.dbUrl)[args.dbNameSubjects]
+
+ rb = SimpleRingBuffer(args.bufferLen)
+
+ global skipWhisks
+ while True:
+ allWhisks = dbWhisks.view('_all_docs', limit=args.docsPerRequest, skip=skipWhisks)
+ skipWhisks += args.docsPerRequest
+ if allWhisks:
+ for wdoc in allWhisks:
+ if wdoc['id'].startswith('_design/'):
+ print('skipping: {0}'.format(wdoc['id']))
+ continue
+ namespace = wdoc['id'][0:wdoc['id'].find('/')]
+
+ exists = rb.get(namespace)
+ if exists == None:
+ exists = checkNamespace(dbSubjects, namespace)
+ rb.append(namespace, exists)
+
+ if exists:
+ print('keeping: {0}'.format(wdoc['id']))
+ else:
+ deleteWhisk(dbWhisks, wdoc)
+ else:
+ return
+
+
+parser = argparse.ArgumentParser(description="Utility to mark/delete whisks entries where the ns does not exist in the subjects database.")
+parser.add_argument("--dbUrl", required=True, help="Server URL of the database, that has to be cleaned of old activations. E.g. 'https://xxx:yyy@domain.couch.com:443'")
+parser.add_argument("--dbNameWhisks", required=True, help="Name of the Whisks Database of the whisks entries to be marked for deletion or deleted if already marked.")
+parser.add_argument("--dbNameSubjects", required=True, help="Name of the Subjects Database.")
+parser.add_argument("--days", required=True, type=int, default=7, help="How many days whisks keep entries marked for deletion before deleting them.")
+parser.add_argument("--docsPerRequest", type=int, default=200, help="Number of documents handled on each CouchDb Request. Default is 200.")
+parser.add_argument("--bufferLen", type=int, default=100, help="Maximum buffer length to cache already checked ns. Default is 100.")
+args = parser.parse_args()
+
+checkWhisks(args)
--
To stop receiving notification emails like this one, please contact
cbickel@apache.org.