You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2020/06/02 21:35:30 UTC

[couchdb] 01/01: Improve efficiency of couch_jobs:accept/2 for views

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch make-couch-view-job-accepts-faster
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit c5397dea1ae8a0c35aa7945b888a6bf9f105dc70
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Tue Jun 2 17:20:19 2020 -0400

    Improve efficiency of couch_jobs:accept/2 for views
    
    Use the `no_schedule` option to speed up job dequeuing. This optimization
    allows dequeuing jobs more efficiently if these conditions are met:
    
     1) Job IDs start with a random prefix
     2) No time-based scheduling is used
    
    Both of those can be true for views job IDs can be generated such that
    signature comes before the db name part, which is what this commit does.
    
    The way the optimization works, is random IDs are generating in pending jobs
    range, then, a key selection is used to pick either a job before or after
    it. That reduces each dequeue attempt to just 1 read instead of reading up to
    1000 jobs.
---
 src/couch_views/src/couch_views_indexer.erl       | 3 ++-
 src/couch_views/src/couch_views_jobs.erl          | 4 +++-
 src/couch_views/test/couch_views_cleanup_test.erl | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl
index bd1bd4d..4d09fdb 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -44,7 +44,8 @@ spawn_link() ->
 
 
 init() ->
-    {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, #{}),
+    Opts = #{no_schedule => true},
+    {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, Opts),
     Data = upgrade_data(Data0),
     #{
         <<"db_name">> := DbName,
diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl
index a9ca168..4b0aa26 100644
--- a/src/couch_views/src/couch_views_jobs.erl
+++ b/src/couch_views/src/couch_views_jobs.erl
@@ -134,7 +134,9 @@ job_id(#{name := DbName}, #mrst{sig = Sig}) ->
 
 job_id(DbName, Sig) ->
     HexSig = fabric2_util:to_hex(Sig),
-    <<DbName/binary, "-", HexSig/binary>>.
+    % Put signature first in order to be able to use the no_schedule
+    % couch_jobs:accept/2 option
+    <<HexSig/binary, "-", DbName/binary>>.
 
 
 job_data(Db, Mrst) ->
diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl
index b5e081a..e4dcdce 100644
--- a/src/couch_views/test/couch_views_cleanup_test.erl
+++ b/src/couch_views/test/couch_views_cleanup_test.erl
@@ -408,4 +408,4 @@ job_id(Db, DDoc) ->
     DbName = fabric2_db:name(Db),
     {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc),
     HexSig = fabric2_util:to_hex(Sig),
-    <<DbName/binary, "-", HexSig/binary>>.
+    <<HexSig/binary, "-", DbName/binary>>.