You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2023/05/06 14:05:45 UTC

[couchdb] branch remove-md5-more created (now 4aef4ebaf)

This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a change to branch remove-md5-more
in repository https://gitbox.apache.org/repos/asf/couchdb.git


      at 4aef4ebaf Merge remote-tracking branch 'origin/remove-content-md5-header' into remove-md5-entirely

This branch includes the following new commits:

     new 3cd5a17a6 s/digest/checksum
     new 2663bdc6b consolidate checksum verification
     new f16953b26 make legacy checksums a compile-time option
     new 90ca838cd s/old/legacy
     new 4aef4ebaf Merge remote-tracking branch 'origin/remove-content-md5-header' into remove-md5-entirely

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 05/05: Merge remote-tracking branch 'origin/remove-content-md5-header' into remove-md5-entirely

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch remove-md5-more
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 4aef4ebaf080a4f5f9a8dd64751a63c9d6e5300d
Merge: 90ca838cd 141eb3d19
Author: Robert Newson <rn...@apache.org>
AuthorDate: Sat May 6 14:40:22 2023 +0100

    Merge remote-tracking branch 'origin/remove-content-md5-header' into remove-md5-entirely

 src/chttpd/src/chttpd_db.erl                       |  34 +-----
 src/couch/src/couch_att.erl                        |  10 +-
 src/couch/src/couch_db.erl                         |  23 +---
 src/couch/src/couch_httpd_db.erl                   |  31 +-----
 src/couch/test/eunit/couchdb_attachments_tests.erl | 122 +--------------------
 test/elixir/test/attachments_test.exs              |  23 ----
 6 files changed, 7 insertions(+), 236 deletions(-)


[couchdb] 02/05: consolidate checksum verification

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch remove-md5-more
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2663bdc6bf3d7c512d6296026c11ef9d9f3c9465
Author: Robert Newson <rn...@apache.org>
AuthorDate: Sat May 6 14:28:26 2023 +0100

    consolidate checksum verification
---
 src/couch/src/couch_file.erl | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index f52417666..315f3a795 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -676,13 +676,7 @@ load_header(Fd, Pos, HeaderLen, RestBlock) ->
         end,
     <<Checksum:16/binary, HeaderBin/binary>> =
         iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)),
-    case exxhash:xxhash128(HeaderBin) of
-        Checksum ->
-            ok;
-        <<_/binary>> ->
-            couch_stats:increment_counter([couch_file, old_checksums]),
-            Checksum = couch_hash:md5_hash(HeaderBin)
-    end,
+    true = verify_checksum(HeaderBin, Checksum),
     {ok, HeaderBin}.
 
 %% Read multiple block locations using a single file:pread/2.
@@ -861,16 +855,25 @@ monitored_by_pids() ->
 verify_checksum(_Fd, _Pos, IoList, <<>>) ->
     IoList;
 verify_checksum(Fd, Pos, IoList, Checksum) ->
-    case exxhash:xxhash128(iolist_to_binary(IoList)) of
-        Checksum ->
+    case verify_checksum(IoList, Checksum) of
+        true ->
             IoList;
-        <<_/binary>> ->
-            case couch_hash:md5_hash(IoList) of
-                Checksum ->
+        false ->
+            report_checksum_error(Fd, Pos)
+    end.
+
+verify_checksum(Data, ExpectedChecksum) ->
+    Bin = iolist_to_binary(Data),
+    case ExpectedChecksum == exxhash:xxhash128(Bin) of
+        true ->
+            true;
+        false ->
+            case ExpectedChecksum == couch_hash:md5_hash(Data) of
+                true ->
                     couch_stats:increment_counter([couch_file, old_checksums]),
-                    IoList;
-                _ ->
-                    report_checksum_error(Fd, Pos)
+                    true;
+                false ->
+                    false
             end
     end.
 


[couchdb] 01/05: s/digest/checksum

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch remove-md5-more
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 3cd5a17a603ee9ecdbaf20c055f6d4adccd92b47
Author: Robert Newson <rn...@apache.org>
AuthorDate: Sat May 6 14:10:16 2023 +0100

    s/digest/checksum
---
 src/couch/priv/stats_descriptions.cfg |  4 +--
 src/couch/src/couch_file.erl          | 64 +++++++++++++++++------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index 2dae98954..a7ef4f928 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -298,9 +298,9 @@
     {type, counter},
     {desc, <<"number of the attempts to read beyond set limit">>}
 ]}.
-{[couch_file, old_digests], [
+{[couch_file, old_checksums], [
     {type, counter},
-    {desc, <<"number of old digests found in couch_file instances">>}
+    {desc, <<"number of old checksums found in couch_file instances">>}
 ]}.
 {[mango, unindexed_queries], [
     {type, counter},
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index afa848efa..f52417666 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -142,8 +142,8 @@ assemble_file_chunk(Bin) ->
     [<<0:1/integer, (iolist_size(Bin)):31/integer>>, Bin].
 
 assemble_file_chunk_and_checksum(Bin) ->
-    Digest = exxhash:xxhash128(Bin),
-    [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Digest, Bin].
+    Checksum = exxhash:xxhash128(Bin),
+    [<<1:1/integer, (iolist_size(Bin)):31/integer>>, Checksum, Bin].
 
 %%----------------------------------------------------------------------
 %% Purpose: Reads a term from a file that was written with append_term
@@ -169,8 +169,8 @@ pread_binary(Fd, Pos) ->
 
 pread_iolist(Fd, Pos) ->
     case ioq:call(Fd, {pread_iolist, Pos}, erlang:get(io_priority)) of
-        {ok, IoList, Digest} ->
-            {ok, verify_digest(Fd, Pos, IoList, Digest)};
+        {ok, IoList, Checksum} ->
+            {ok, verify_checksum(Fd, Pos, IoList, Checksum)};
         Error ->
             Error
     end.
@@ -191,13 +191,13 @@ pread_binaries(Fd, PosList) ->
 
 pread_iolists(Fd, PosList) ->
     case ioq:call(Fd, {pread_iolists, PosList}, erlang:get(io_priority)) of
-        {ok, DataAndDigests} ->
+        {ok, DataAndChecksums} ->
             Data = lists:zipwith(
-                fun(Pos, {IoList, Digest}) ->
-                    verify_digest(Fd, Pos, IoList, Digest)
+                fun(Pos, {IoList, Checksum}) ->
+                    verify_checksum(Fd, Pos, IoList, Checksum)
                 end,
                 PosList,
-                DataAndDigests
+                DataAndChecksums
             ),
             {ok, Data};
         Error ->
@@ -400,9 +400,9 @@ read_header(Fd) ->
 
 write_header(Fd, Data) ->
     Bin = term_to_binary(Data),
-    Digest = exxhash:xxhash128(Bin),
+    Checksum = exxhash:xxhash128(Bin),
     % now we assemble the final header binary and write to disk
-    FinalBin = <<Digest/binary, Bin/binary>>,
+    FinalBin = <<Checksum/binary, Bin/binary>>,
     ioq:call(Fd, {write_header, FinalBin}, erlang:get(io_priority)).
 
 init_status_error(ReturnPid, Ref, Error) ->
@@ -504,11 +504,11 @@ handle_call({pread_iolist, Pos}, _From, File) ->
     update_read_timestamp(),
     {LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4),
     case iolist_to_binary(LenIolist) of
-        % an digest-prefixed term
+        % an checksum-prefixed term
         <<1:1/integer, Len:31/integer>> ->
-            {DigestAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
-            {Digest, IoList} = extract_digest(DigestAndIoList),
-            {reply, {ok, IoList, Digest}, File};
+            {ChecksumAndIoList, _} = read_raw_iolist_int(File, NextPos, Len + 16),
+            {Checksum, IoList} = extract_checksum(ChecksumAndIoList),
+            {reply, {ok, IoList, Checksum}, File};
         <<0:1/integer, Len:31/integer>> ->
             {Iolist, _} = read_raw_iolist_int(File, NextPos, Len),
             {reply, {ok, Iolist, <<>>}, File}
@@ -520,7 +520,7 @@ handle_call({pread_iolists, PosL}, _From, File) ->
     LocNums2 = lists:map(
         fun({LenIoList, NextPos}) ->
             case iolist_to_binary(LenIoList) of
-                % a digest-prefixed term
+                % a checksum-prefixed term
                 <<1:1/integer, Len:31/integer>> ->
                     {NextPos, Len + 16};
                 <<0:1/integer, Len:31/integer>> ->
@@ -534,8 +534,8 @@ handle_call({pread_iolists, PosL}, _From, File) ->
         fun({LenIoList, _}, {IoList, _}) ->
             case iolist_to_binary(LenIoList) of
                 <<1:1/integer, _:31/integer>> ->
-                    {Digest, IoList} = extract_digest(IoList),
-                    {IoList, Digest};
+                    {Checksum, IoList} = extract_checksum(IoList),
+                    {IoList, Checksum};
                 <<0:1/integer, _:31/integer>> ->
                     {IoList, <<>>}
             end
@@ -674,14 +674,14 @@ load_header(Fd, Pos, HeaderLen, RestBlock) ->
                 {ok, Missing} = file:pread(Fd, ReadStart, ReadLen),
                 <<RestBlock/binary, Missing/binary>>
         end,
-    <<Digest:16/binary, HeaderBin/binary>> =
+    <<Checksum:16/binary, HeaderBin/binary>> =
         iolist_to_binary(remove_block_prefixes(?PREFIX_SIZE, RawBin)),
     case exxhash:xxhash128(HeaderBin) of
-        Digest ->
+        Checksum ->
             ok;
         <<_/binary>> ->
-            couch_stats:increment_counter([couch_file, old_digests]),
-            Digest = couch_hash:md5_hash(HeaderBin)
+            couch_stats:increment_counter([couch_file, old_checksums]),
+            Checksum = couch_hash:md5_hash(HeaderBin)
     end,
     {ok, HeaderBin}.
 
@@ -785,10 +785,10 @@ get_pread_locnum(File, Pos, Len) ->
             {Pos, TotalBytes}
     end.
 
--spec extract_digest(iolist()) -> {binary(), iolist()}.
-extract_digest(FullIoList) ->
-    {DigestList, IoList} = split_iolist(FullIoList, 16, []),
-    {iolist_to_binary(DigestList), IoList}.
+-spec extract_checksum(iolist()) -> {binary(), iolist()}.
+extract_checksum(FullIoList) ->
+    {ChecksumList, IoList} = split_iolist(FullIoList, 16, []),
+    {iolist_to_binary(ChecksumList), IoList}.
 
 calculate_total_read_len(0, FinalLen) ->
     calculate_total_read_len(1, FinalLen) + 1;
@@ -858,23 +858,23 @@ monitored_by_pids() ->
     {monitored_by, PidsAndRefs} = process_info(self(), monitored_by),
     lists:filter(fun is_pid/1, PidsAndRefs).
 
-verify_digest(_Fd, _Pos, IoList, <<>>) ->
+verify_checksum(_Fd, _Pos, IoList, <<>>) ->
     IoList;
-verify_digest(Fd, Pos, IoList, Digest) ->
+verify_checksum(Fd, Pos, IoList, Checksum) ->
     case exxhash:xxhash128(iolist_to_binary(IoList)) of
-        Digest ->
+        Checksum ->
             IoList;
         <<_/binary>> ->
             case couch_hash:md5_hash(IoList) of
-                Digest ->
-                    couch_stats:increment_counter([couch_file, old_digests]),
+                Checksum ->
+                    couch_stats:increment_counter([couch_file, old_checksums]),
                     IoList;
                 _ ->
-                    report_digest_error(Fd, Pos)
+                    report_checksum_error(Fd, Pos)
             end
     end.
 
-report_digest_error(Fd, Pos) ->
+report_checksum_error(Fd, Pos) ->
     couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]),
     exit({file_corruption, <<"file corruption">>}).
 


[couchdb] 03/05: make legacy checksums a compile-time option

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch remove-md5-more
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit f16953b26414f915b0d400f62cde1dfdd6f3c50c
Author: Robert Newson <rn...@apache.org>
AuthorDate: Sat May 6 14:36:38 2023 +0100

    make legacy checksums a compile-time option
---
 src/couch/src/couch_file.erl | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 315f3a795..0a9aaf49c 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -864,11 +864,11 @@ verify_checksum(Fd, Pos, IoList, Checksum) ->
 
 verify_checksum(Data, ExpectedChecksum) ->
     Bin = iolist_to_binary(Data),
-    case ExpectedChecksum == exxhash:xxhash128(Bin) of
+    case ExpectedChecksum == checksum(Bin) of
         true ->
             true;
         false ->
-            case ExpectedChecksum == couch_hash:md5_hash(Data) of
+            case ExpectedChecksum == legacy_checksum(Data) of
                 true ->
                     couch_stats:increment_counter([couch_file, old_checksums]),
                     true;
@@ -877,6 +877,18 @@ verify_checksum(Data, ExpectedChecksum) ->
             end
     end.
 
+checksum(Bin) when is_binary(Bin) ->
+    exxhash:xxhash128(Bin).
+
+-ifdef(NO_MD5).
+legacy_checksum(Bin) when is_binary(Bin) ->
+    true.
+-else.
+legacy_checksum(Bin) when is_binary(Bin) ->
+    couch_hash:md5_hash(Bin).
+-endif.
+
+
 report_checksum_error(Fd, Pos) ->
     couch_log:emergency("File corruption in ~p at position ~B", [Fd, Pos]),
     exit({file_corruption, <<"file corruption">>}).


[couchdb] 04/05: s/old/legacy

Posted by rn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rnewson pushed a commit to branch remove-md5-more
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 90ca838cd3f3e2f03b2ee4d433d700a1459b0a84
Author: Robert Newson <rn...@apache.org>
AuthorDate: Sat May 6 14:37:43 2023 +0100

    s/old/legacy
---
 src/couch/priv/stats_descriptions.cfg | 4 ++--
 src/couch/src/couch_file.erl          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg
index a7ef4f928..46f08d724 100644
--- a/src/couch/priv/stats_descriptions.cfg
+++ b/src/couch/priv/stats_descriptions.cfg
@@ -298,9 +298,9 @@
     {type, counter},
     {desc, <<"number of the attempts to read beyond set limit">>}
 ]}.
-{[couch_file, old_checksums], [
+{[couch_file, legacy_checksums], [
     {type, counter},
-    {desc, <<"number of old checksums found in couch_file instances">>}
+    {desc, <<"number of legacy checksums found in couch_file instances">>}
 ]}.
 {[mango, unindexed_queries], [
     {type, counter},
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index 0a9aaf49c..2f384541d 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -870,7 +870,7 @@ verify_checksum(Data, ExpectedChecksum) ->
         false ->
             case ExpectedChecksum == legacy_checksum(Data) of
                 true ->
-                    couch_stats:increment_counter([couch_file, old_checksums]),
+                    couch_stats:increment_counter([couch_file, legacy_checksums]),
                     true;
                 false ->
                     false