You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2022/01/11 17:59:24 UTC

[couchdb] branch collation-view-versioning created (now 80c225c)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch collation-view-versioning
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at 80c225c  Handle libicu upgrades in views

This branch includes the following new commits:

     new 80c225c  Handle libicu upgrades in views

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[couchdb] 01/01: Handle libicu upgrades in views

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch collation-view-versioning
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 80c225cdf8a79fc7597d2cb43f17e1f3a4092b4f
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Tue Jan 11 12:53:10 2022 -0500

    Handle libicu upgrades in views
    
    Previously, libicu collator version were not tracked and during major version
    upgrades it would have been possible to experience data loss due to collation
    order changes.
    
    To fix it, introduced a new view info map header field which records the range
    of UCA (Unicode Collation Algorithm) major version. The range is updated every
    time a view is opened form disk, so if it is accessed with a newer version, the
    range of version would be > 2 and in that case smoosh would enqueue it for
    recompaction.
    
    Recompaction should re-write all the keys and value using the current libicu
    library version. Since we don't use sort keys, it should fix the view sort
    order.
    
    As a bonus, we are able to reuse a currently ingnored header field, which is
    transparently ignored during downgrades back to the previous 3.2.1 version.
    
    Moreover, the newly created view info map, may be re-used in the future to
    record other metadata about the view without the need to expand the view record
    and allow for easy downgrades.
---
 src/couch_mrview/include/couch_mrview.hrl   |  4 ++-
 src/couch_mrview/src/couch_mrview_index.erl |  8 +++--
 src/couch_mrview/src/couch_mrview_util.erl  | 54 +++++++++++++++++++++++------
 src/smoosh/src/smoosh_server.erl            | 11 ++++++
 4 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/src/couch_mrview/include/couch_mrview.hrl b/src/couch_mrview/include/couch_mrview.hrl
index bb0ab0b..3b3784a 100644
--- a/src/couch_mrview/include/couch_mrview.hrl
+++ b/src/couch_mrview/include/couch_mrview.hrl
@@ -29,7 +29,8 @@
     doc_acc,
     doc_queue,
     write_queue,
-    qserver=nil
+    qserver=nil,
+    view_info=#{}
 }).
 
 
@@ -49,6 +50,7 @@
     seq=0,
     purge_seq=0,
     id_btree_state=nil,
+    view_info=#{}, % replace log btree in versions < 3.x
     view_states=nil
 }).
 
diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl
index a024d35..6113ee0 100644
--- a/src/couch_mrview/src/couch_mrview_index.erl
+++ b/src/couch_mrview/src/couch_mrview_index.erl
@@ -63,7 +63,8 @@ get(info, State) ->
         language = Lang,
         update_seq = UpdateSeq,
         purge_seq = PurgeSeq,
-        views = Views
+        views = Views,
+        view_info = ViewInfo
     } = State,
     {ok, FileSize} = couch_file:bytes(Fd),
     {ok, ExternalSize} = couch_mrview_util:calculate_external_size(Views),
@@ -72,7 +73,7 @@ get(info, State) ->
 
     UpdateOptions0 = get(update_options, State),
     UpdateOptions = [atom_to_binary(O, latin1) || O <- UpdateOptions0],
-
+    CollatorVersions = couch_mrview_util:get_collator_versions(ViewInfo),
     {ok, [
         {signature, list_to_binary(couch_index_util:hexsig(Sig))},
         {language, Lang},
@@ -84,7 +85,8 @@ get(info, State) ->
             ]}},
         {update_seq, UpdateSeq},
         {purge_seq, PurgeSeq},
-        {update_options, UpdateOptions}
+        {update_options, UpdateOptions},
+        {collator_versions, CollatorVersions}
     ]};
 get(Other, _) ->
     throw({unknown_index_property, Other}).
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index b7220f7..8f9f219 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -32,6 +32,7 @@
 -export([get_view_keys/1, get_view_queries/1]).
 -export([set_view_type/3]).
 -export([set_extra/3, get_extra/2, get_extra/3]).
+-export([get_collator_versions/1]).
 
 -define(MOD, couch_mrview_index).
 -define(GET_VIEW_RETRY_COUNT, 1).
@@ -285,6 +286,7 @@ init_state(Db, Fd, #mrst{views = Views} = State, nil) ->
         seq = 0,
         purge_seq = PurgeSeq,
         id_btree_state = nil,
+        view_info = update_collator_versions(#{}),
         view_states = [make_view_state(#mrview{}) || _ <- Views]
     },
     init_state(Db, Fd, State, Header);
@@ -297,6 +299,7 @@ init_state(Db, Fd, State, Header) ->
         seq = Seq,
         purge_seq = PurgeSeq,
         id_btree_state = IdBtreeState,
+        view_info = ViewInfo,
         view_states = ViewStates
     } = maybe_update_header(Header),
 
@@ -314,7 +317,8 @@ init_state(Db, Fd, State, Header) ->
         update_seq = Seq,
         purge_seq = PurgeSeq,
         id_btree = IdBtree,
-        views = Views2
+        views = Views2,
+        view_info = ViewInfo
     }.
 
 open_view(_Db, Fd, Lang, ViewState, View) ->
@@ -764,13 +768,15 @@ make_header(State) ->
         update_seq = Seq,
         purge_seq = PurgeSeq,
         id_btree = IdBtree,
-        views = Views
+        views = Views,
+        view_info = ViewInfo
     } = State,
 
     #mrheader{
         seq = Seq,
         purge_seq = PurgeSeq,
         id_btree_state = get_btree_state(IdBtree),
+        view_info = ViewInfo,
         view_states = [make_view_state(V) || V <- Views]
     }.
 
@@ -819,7 +825,8 @@ reset_state(State) ->
         qserver = nil,
         update_seq = 0,
         id_btree = nil,
-        views = [View#mrview{btree = nil} || View <- State#mrst.views]
+        views = [View#mrview{btree = nil} || View <- State#mrst.views],
+        view_info = #{}
     }.
 
 all_docs_key_opts(#mrargs{extra = Extra} = Args) ->
@@ -1070,16 +1077,33 @@ old_view_format(View, SI, KSI) ->
         View#mrview.options
     }.
 
-maybe_update_header(#mrheader{} = Header) ->
-    Header;
-maybe_update_header(Header) when tuple_size(Header) == 6 ->
+maybe_update_header(#mrheader{view_info = Info} = Header) when is_map(Info) ->
+    % Latest (3.2.1+) version. Note: the size of the record is the same as the
+    % legacy <2.3.1 version. The main difference is that the LogBt field is now
+    % a map. This trick also allows for easy downgrading back to version 3.2.1
+    % and then upgrading back to 3.2.1 if needed.
+    Header#mrheader{view_info = update_collator_versions(Info)};
+maybe_update_header({mrheader, Seq, PSeq, IDBt, ViewStates}) ->
+    % Versions >2.3.1 and =<3.2.1 (no view info map)
+    #mrheader{
+        seq = Seq,
+        purge_seq = PSeq,
+        id_btree_state = IDBt,
+        view_info = update_collator_versions(#{}),
+        view_states = [make_view_state(S) || S <- ViewStates]
+    };
+maybe_update_header({mrheader, Seq, PSeq, IDBt, LogBt, ViewStates})
+        when not is_map(LogBt) ->
+    % Versions <2.3.1.
     #mrheader{
-        seq = element(2, Header),
-        purge_seq = element(3, Header),
-        id_btree_state = element(4, Header),
-        view_states = [make_view_state(S) || S <- element(6, Header)]
+        seq = Seq,
+        purge_seq = PSeq,
+        id_btree_state = IDBt,
+        view_info = update_collator_versions(#{}),
+        view_states = [make_view_state(S) || S <- ViewStates]
     }.
 
+
 %% End of <= 2.x upgrade code.
 
 make_view_state(#mrview{} = View) ->
@@ -1216,3 +1240,13 @@ kv_external_size(KVList, Reduction) ->
         ?term_size(Reduction),
         KVList
     ).
+
+update_collator_versions(#{} = ViewInfo) ->
+    Ver = element(1, couch_ejson_compare:get_uca_version()),
+    Versions = maps:get(uca_vers, ViewInfo, []),
+    ViewInfo#{uca_vers := lists:usort([Ver | Versions])}.
+
+get_collator_versions(#{uca_vers := Versions}) when is_list(Versions) ->
+    Versions;
+get_collator_versions(#{}) ->
+    [].
diff --git a/src/smoosh/src/smoosh_server.erl b/src/smoosh/src/smoosh_server.erl
index 0526625..b5c2b56 100644
--- a/src/smoosh/src/smoosh_server.erl
+++ b/src/smoosh/src/smoosh_server.erl
@@ -480,6 +480,9 @@ get_priority(Channel) ->
     smoosh_utils:get(Channel, "priority", "ratio").
 
 needs_upgrade(Props) ->
+    db_needs_upgrade(Props) orelse view_needs_upgrade(Props).
+
+db_needs_upgrade(Props) ->
     DiskVersion = couch_util:get_value(disk_format_version, Props),
     case couch_util:get_value(engine, Props) of
         couch_bt_engine ->
@@ -488,6 +491,14 @@ needs_upgrade(Props) ->
             false
     end.
 
+view_needs_upgrade(Props) ->
+    case couch_util:get_value(collator_versions, Props) of
+        undefined ->
+            false;
+        Versions when is_list(Versions) ->
+            length(Versions) >= 2
+    end.
+
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").