You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2020/06/02 21:31:08 UTC

[couchdb] 01/01: Improve efficiency of couch_jobs:accept for views

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch make-couch-view-job-accepts-faster
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit f5cfc1c2a608f576c78c37368b43e6a66c9c3d6e
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Tue Jun 2 17:20:19 2020 -0400

    Improve efficiency of couch_jobs:accept for views
    
    Use couch_jobs's `no_schedule` accept option to speed up job dequeuing.
    
    This optimization allows dequeung jobs more efficiently if these conditions are
    met:
    
     1) Job IDs start with a random prefix
     2) No time-based scheduling is used
    
    Both of those can be true for views job ids can be generated such that
    signature comes before the db name part, which is what this commit does.
    
    The way the optimisation works, is random IDs are generating in pending jobs
    range, then, a key selection is used to pick either a job before or after
    it. That reduces each dequeue attempt to just 1 read instead of reading up to
    1000 jobs.
---
 src/couch_views/src/couch_views_indexer.erl       | 3 ++-
 src/couch_views/src/couch_views_jobs.erl          | 4 +++-
 src/couch_views/test/couch_views_cleanup_test.erl | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl
index bd1bd4d..4d09fdb 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -44,7 +44,8 @@ spawn_link() ->
 
 
 init() ->
-    {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, #{}),
+    Opts = #{no_schedule => true},
+    {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, Opts),
     Data = upgrade_data(Data0),
     #{
         <<"db_name">> := DbName,
diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl
index a9ca168..4b0aa26 100644
--- a/src/couch_views/src/couch_views_jobs.erl
+++ b/src/couch_views/src/couch_views_jobs.erl
@@ -134,7 +134,9 @@ job_id(#{name := DbName}, #mrst{sig = Sig}) ->
 
 job_id(DbName, Sig) ->
     HexSig = fabric2_util:to_hex(Sig),
-    <<DbName/binary, "-", HexSig/binary>>.
+    % Put signature first in order to be able to use the no_schedule
+    % couch_jobs:accept/2 option
+    <<HexSig/binary, "-", DbName/binary>>.
 
 
 job_data(Db, Mrst) ->
diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl
index b5e081a..e4dcdce 100644
--- a/src/couch_views/test/couch_views_cleanup_test.erl
+++ b/src/couch_views/test/couch_views_cleanup_test.erl
@@ -408,4 +408,4 @@ job_id(Db, DDoc) ->
     DbName = fabric2_db:name(Db),
     {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
     HexSig = fabric2_util:to_hex(Sig),
-    <<DbName/binary, "-", HexSig/binary>>.
+    <<HexSig/binary, "-", DbName/binary>>.