You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2017/06/06 05:10:19 UTC

svn commit: r1797727 - /jackrabbit/oak/trunk/oak-run/src/main/js/oak-mongo.js

Author: chetanm
Date: Tue Jun  6 05:10:19 2017
New Revision: 1797727

URL: http://svn.apache.org/viewvc?rev=1797727&view=rev
Log:
OAK-6307 - Function to find all large docs in Mongo

Modified:
    jackrabbit/oak/trunk/oak-run/src/main/js/oak-mongo.js

Modified: jackrabbit/oak/trunk/oak-run/src/main/js/oak-mongo.js
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/js/oak-mongo.js?rev=1797727&r1=1797726&r2=1797727&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/js/oak-mongo.js (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/js/oak-mongo.js Tue Jun  6 05:10:19 2017
@@ -214,6 +214,42 @@ var oak = (function(global){
     };
 
     /**
+     * Find and dumps _id of all documents where the document size exceeds
+     * 15MB size. It also dumps progress info after every 10k docs.
+     *
+     * The ids can be found by grepping for '^id|' pattern
+     *
+     * > oak.dumpLargeDocIds({db: "aem-author"})
+     *
+     * @param {object} options pass optional parameters for host, port, db, and filename
+     */
+    api.dumpLargeDocIds = function (options) {
+        var count = 0;
+        var ids = [];
+        db.nodes.find().forEach(function (doc) {
+            var size = Object.bsonsize(doc);
+            if (size > 15 * 1024 * 1024) {
+                print("id|" + doc._id);
+                ids.push(doc._id)
+            }
+            if (++count % 10000 === 0) {
+                print("Traversed #" + count)
+            }
+        });
+
+        print("Number of large documents : " + ids.length);
+
+        //Dump the export command to dump all such large docs
+        if (ids.length > 0) {
+            var query = JSON.stringify({_id: {$in: ids}});
+            print("Using following export command to tweak the output");
+            options = options || {};
+            options.db = db.getName();
+            print(createExportCommand(query, options));
+        }
+    };
+
+    /**
      * Converts the given Revision String into a more human readable version,
      * which also prints the date.
      *
@@ -579,23 +615,7 @@ var oak = (function(global){
      */
 
     api.printMongoExportCommand = function (path, options) {
-        options = options || {};
-        var host = options.host || "127.0.0.1";
-        var port = options.port || "27017";
-        var db = options.db || "oak";
-        var filename = options.filename || "all-required-nodes.json"
-
-        var query = JSON.stringify(getDocAndHierarchyQuery(path));
-
-        var mongoExportCommand = "mongoexport"
-                                    + " --host " + host
-                                    + " --port " + port
-                                    + " --db " + db
-                                    + " --collection nodes"
-                                    + " --out " + filename
-                                    + " --query '" + query + "'";
-
-        return mongoExportCommand;
+        return createExportCommand(JSON.stringify(getDocAndHierarchyQuery(path)), options);
     };
 
     /**
@@ -641,6 +661,22 @@ var oak = (function(global){
 
     //~--------------------------------------------------< internal >
 
+    var createExportCommand = function (query, options) {
+        options = options || {};
+        var host = options.host || "127.0.0.1";
+        var port = options.port || "27017";
+        var db = options.db || "oak";
+        var filename = options.filename || "all-required-nodes.json"
+
+        return "mongoexport"
+            + " --host " + host
+            + " --port " + port
+            + " --db " + db
+            + " --collection nodes"
+            + " --out " + filename
+            + " --query '" + query + "'";
+    };
+
     var checkOrFixDeepHistory = function(path, fix, prepare, verbose) {
         if (prepare) {
             // not issuing any header at all