You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2014/08/26 11:45:56 UTC

[01/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Repository: couchdb-couch
Updated Branches:
  refs/heads/windsor-merge 2082f5845 -> 14dc5e93d (forced update)


Move attachment code into couch_att

This is moves a majority of the attachment representation
into the couch_att module. This serves to isolate the
current record to allow easier in-place upgrades as well as
a place to start collecting common attachment related
functionality.

The upgrades are handled lazily to allow rollbacks to older
code if the new attachment format has not yet been required
via storage of any of the new extended attributes supported
by the fetch/store APIs. There are some caveats to this in
that the extended attributes are not enforced by couch_att
at this time so it'd be quite easy to store garbage. As the
extent of attachment concerns becomes more stable, a set of
more permanent fetch_[field]/store_[field] functions may be
added to help enforce both field types as well as common
field names and defaults (all fields will default to
undefined except for those defaults present in the orignal
record definition, which carry over automatically).

Finally, while this patch does move a lot of code to
couch_att, it hasn't refined the interfaces much. These
changes will follow in later patches to improve and
simplify the organization of attachment code. This
includes the addition of more unit tests which currently
only cover some portions of the attachment functionality
related to upgrades and field fetching & storage.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/f3e45383
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/f3e45383
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/f3e45383

Branch: refs/heads/windsor-merge
Commit: f3e453831b7ce428c360c5a171301f10453c887c
Parents: af4c2aa
Author: Brian Mitchell <br...@p2p.io>
Authored: Wed Dec 11 23:07:50 2013 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:27 2014 +0100

----------------------------------------------------------------------
 include/couch_db.hrl     |  19 +-
 src/couch_att.erl        | 806 ++++++++++++++++++++++++++++++++++++++++++
 src/couch_db.erl         | 270 +++++---------
 src/couch_db_updater.erl |   2 +-
 src/couch_doc.erl        | 285 +++------------
 src/couch_httpd_db.erl   | 138 ++++----
 6 files changed, 1019 insertions(+), 501 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f3e45383/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 570d3db..8bb44a0 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -89,7 +89,7 @@
     % the json body object.
     body = {[]},
 
-    atts = [], % attachments
+    atts = [] :: [couch_att:att()], % attachments
 
     deleted = false,
 
@@ -99,23 +99,6 @@
 }).
 
 
--record(att, {
-    name,
-    type,
-    att_len,
-    disk_len, % length of the attachment in its identity form
-              % (that is, without a content encoding applied to it)
-              % differs from att_len when encoding /= identity
-    md5= <<>>,
-    revpos=0,
-    data,
-    encoding=identity % currently supported values are:
-                      %     identity, gzip
-                      % additional values to support in the future:
-                      %     deflate, compress
-}).
-
-
 -record(user_ctx, {
     name=null,
     roles=[],

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f3e45383/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
new file mode 100644
index 0000000..e011402
--- /dev/null
+++ b/src/couch_att.erl
@@ -0,0 +1,806 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_att).
+
+-export([
+    new/0,
+    new/1,
+    fetch/2,
+    store/2,
+    store/3,
+    transform/3
+]).
+
+-export([
+    is_stub/1,
+    merge_stubs/2
+]).
+
+-export([
+    disk_info/2,
+    to_disk_term/1,
+    from_disk_term/2
+]).
+
+-export([
+    from_json/2,
+    to_json/4
+]).
+
+-export([
+    flush/2,
+    foldl/3,
+    range_foldl/5,
+    foldl_decode/3,
+    to_binary/1
+]).
+
+-export([
+    upgrade/1,
+    downgrade/1
+]).
+
+-export_type([att/0]).
+
+-include_lib("couch/include/couch_db.hrl").
+
+
+%% Legacy attachment record. This is going to be phased out by the new proplist
+%% based structure. It's needed for now to allow code to perform lazy upgrades
+%% while the patch is rolled out to the cluster. Attachments passed as records
+%% will remain so until they are required to be represented as property lists.
+%% Once this has been widely deployed, this record will be removed entirely and
+%% property lists will be the main format.
+-record(att, {
+    name :: binary(),
+    type :: binary(),
+    att_len :: non_neg_integer(),
+
+    %% length of the attachment in its identity form
+    %% (that is, without a content encoding applied to it)
+    %% differs from att_len when encoding /= identity
+    disk_len :: non_neg_integer(),
+
+    md5 = <<>> :: binary(),
+    revpos = 0 :: non_neg_integer(),
+    data :: stub | follows | binary() | {any(), any()} |
+            {follows, pid(), reference()} | fun(() -> binary()),
+
+    %% Encoding of the attachment
+    %% currently supported values are:
+    %%     identity, gzip
+    %% additional values to support in the future:
+    %%     deflate, compress
+    encoding = identity :: identity | gzip
+}).
+
+
+%% Extensible Attachment Type
+%%
+%% The following types describe the known properties for attachment fields
+%% encoded as property lists to allow easier upgrades. Values not in this list
+%% should be accepted at runtime but should be treated as opaque data as might
+%% be used by upgraded code. If you plan on operating on new data, please add
+%% an entry here as documentation.
+
+
+%% The name of the attachment is also used as the mime-part name for file
+%% downloads. These must be unique per document.
+-type name_prop() :: {name, binary()}.
+
+
+%% The mime type of the attachment. This does affect compression of certain
+%% attachments if the type is found to be configured as a compressable type.
+%% This is commonly reserved for text/* types but could include other custom
+%% cases as well. See definition and use of couch_util:compressable_att_type/1.
+-type type_prop() :: {type, binary()}.
+
+
+%% The attachment length is similar to disk-length but ignores additional
+%% encoding that may have occurred.
+-type att_len_prop() :: {att_len, non_neg_integer()}.
+
+
+%% The size of the attachment as stored in a disk stream.
+-type disk_len_prop() :: {disk_len, non_neg_integer()}.
+
+
+%% This is a digest of the original attachment data as uploaded by the client.
+%% it's useful for checking validity of contents against other attachment data
+%% as well as quick digest computation of the enclosing document.
+-type md5_prop() :: {md5, binary()}.
+
+
+-type revpos_prop() :: {revpos, 0}.
+
+
+%% This field is currently overloaded with just about everything. The
+%% {any(), any()} type is just there until I have time to check the actual
+%% values expected. Over time this should be split into more than one property
+%% to allow simpler handling.
+-type data_prop() :: {
+    data, stub | follows | binary() | {any(), any()} |
+    {follows, pid(), reference()} | fun(() -> binary())
+}.
+
+
+%% We will occasionally compress our data. See type_prop() for more information
+%% on when this happens.
+-type encoding_prop() :: {encoding, identity | gzip}.
+
+
+-type attachment() :: [
+    name_prop() | type_prop() |
+    att_len_prop() | disk_len_prop() |
+    md5_prop() | revpos_prop() |
+    data_prop() | encoding_prop()
+].
+
+
+-opaque att() :: #att{} | attachment().
+
+
+new() ->
+    %% We construct a record by default for compatability. This will be
+    %% upgraded on demand. A subtle effect this has on all attachments
+    %% constructed via new is that it will pick up the proper defaults
+    %% from the #att record definition given above. Newer properties do
+    %% not support special default values and will all be treated as
+    %% undefined.
+    #att{}.
+
+
+-spec new([{atom(), any()}]) -> att().
+new(Props) ->
+    store(Props, new()).
+
+
+-spec fetch([atom()], att()) -> [any()];
+           (atom(), att()) -> any().
+fetch(Fields, Att) when is_list(Fields) ->
+    [fetch(Field, Att) || Field <- Fields];
+fetch(Field, Att) when is_list(Att) ->
+    case lists:keyfind(Field, 1, Att) of
+        {Field, Value} -> Value;
+        false -> undefined
+    end;
+fetch(name, #att{name = Name}) ->
+    Name;
+fetch(type, #att{type = Type}) ->
+    Type;
+fetch(att_len, #att{att_len = AttLen}) ->
+    AttLen;
+fetch(disk_len, #att{disk_len = DiskLen}) ->
+    DiskLen;
+fetch(md5, #att{md5 = Digest}) ->
+    Digest;
+fetch(revpos, #att{revpos = RevPos}) ->
+    RevPos;
+fetch(data, #att{data = Data}) ->
+    Data;
+fetch(encoding, #att{encoding = Encoding}) ->
+    Encoding;
+fetch(_, _) ->
+    undefined.
+
+
+-spec store([{atom(), any()}], att()) -> att().
+store(Props, Att0) ->
+    lists:foldl(fun({Field, Value}, Att) ->
+        store(Field, Value, Att)
+    end, Att0, Props).
+
+
+-spec store(atom(), any(), att()) -> att().
+store(Field, undefined, Att) when is_list(Att) ->
+    lists:keydelete(Field, 1, Att);
+store(Field, Value, Att) when is_list(Att) ->
+    lists:keystore(Field, 1, Att, {Field, Value});
+store(name, Name, Att) ->
+    Att#att{name = Name};
+store(type, Type, Att) ->
+    Att#att{type = Type};
+store(att_len, AttLen, Att) ->
+    Att#att{att_len = AttLen};
+store(disk_len, DiskLen, Att) ->
+    Att#att{disk_len = DiskLen};
+store(md5, Digest, Att) ->
+    Att#att{md5 = Digest};
+store(revpos, RevPos, Att) ->
+    Att#att{revpos = RevPos};
+store(data, Data, Att) ->
+    Att#att{data = Data};
+store(encoding, Encoding, Att) ->
+    Att#att{encoding = Encoding};
+store(Field, Value, Att) ->
+    store(Field, Value, upgrade(Att)).
+
+
+-spec transform(atom(), fun(), att()) -> att().
+transform(Field, Fun, Att) ->
+    NewValue = Fun(fetch(Field, Att)),
+    store(Field, NewValue, Att).
+
+
+is_stub(Att) ->
+    stub == fetch(data, Att).
+
+
+%% merge_stubs takes all stub attachments and replaces them with on disk
+%% attachments. It will return {missing, Name} if a stub isn't matched with
+%% an existing attachment on disk. If the revpos is supplied with the stub
+%% it is also only counted to match if is the same as the disk attachment.
+merge_stubs(MemAtts, DiskAtts) ->
+    OnDisk = dict:from_list(
+        [{fetch(name, Att), Att} || Att <- DiskAtts]
+    ),
+    merge_stubs(MemAtts, OnDisk, []).
+
+
+-spec merge_stubs([att()], dict:dict(), [att()]) -> [att()].
+merge_stubs([Att | Rest], OnDisk, Merged) ->
+    case fetch(data, Att) of
+        stub ->
+            [Name, Pos] = fetch([name, revpos], Att),
+            case dict:find(Name, OnDisk) of
+                {ok, DiskAtt} ->
+                    RevPos = fetch(revpos, DiskAtt),
+                    if
+                        %% We want to check for consistency between the stub and
+                        %% disk revpos here. If the stub's revpos is undefined
+                        %% it means it wasn't provided by the user and does not
+                        %% require being matched.
+                        RevPos == Pos orelse Pos == undefined ->
+                            merge_stubs(Rest, OnDisk, [DiskAtt | Merged]);
+                        true ->
+                            {missing, Name}
+                    end;
+                _ ->
+                    {missing, Name}
+            end;
+        _ ->
+            merge_stubs(Rest, OnDisk, [Att | Merged])
+    end;
+merge_stubs([], _, Merged) ->
+    {ok, Merged}.
+
+
+disk_info(_, []) ->
+    {ok, [], []};
+disk_info(ActiveFd, Atts) ->
+    {AttFd, _} = fetch(data, hd(Atts)),
+    if
+        AttFd == ActiveFd ->
+            Tuples = [to_disk_term(Att) || Att <- Atts],
+            Info = lists:map(fun(Att) ->
+                [{_, Pos}, AttLen] = fetch([data, att_len], Att),
+                {Pos, AttLen}
+            end, Atts),
+            {ok, Tuples, Info};
+        true ->
+            ?LOG_ERROR("MISMATCH: ~p ; ~p~n", [ActiveFd, Atts]),
+            file_mismatch
+    end.
+
+
+%% When converting an attachment to disk term format, attempt to stay with the
+%% old format when possible. This should help make the attachment lazy upgrade
+%% as safe as possible, avoiding the need for complicated disk versioning
+%% schemes.
+to_disk_term(#att{} = Att) ->
+    {_, StreamIndex} = fetch(data, Att),
+    {
+        fetch(name, Att),
+        fetch(type, Att),
+        StreamIndex,
+        fetch(att_len, Att),
+        fetch(disk_len, Att),
+        fetch(revpos, Att),
+        fetch(md5, Att),
+        fetch(encoding, Att)
+    };
+to_disk_term(Att) ->
+    BaseProps = [name, type, data, att_len, disk_len, revpos, md5, encoding],
+    {Extended, Base} = lists:foldl(
+        fun
+            (data, {Props, Values}) ->
+                case lists:keytake(data, 1, Props) of
+                    {value, {_, {_Fd, Sp}}, Other} -> {Other, [Sp | Values]};
+                    {value, {_, Value}, Other} -> {Other, [Value | Values]};
+                    false -> {Props, [undefined |Values ]}
+                end;
+            (Key, {Props, Values}) ->
+                case lists:keytake(Key, 1, Props) of
+                    {value, {_, Value}, Other} -> {Other, [Value | Values]};
+                    false -> {Props, [undefined | Values]}
+                end
+        end,
+        {Att, []},
+        BaseProps
+    ),
+    {list_to_tuple(lists:reverse(Base)), Extended}.
+
+
+%% The new disk term format is a simple wrapper around the legacy format. Base
+%% properties will remain in a tuple while the new fields and possibly data from
+%% future extensions will be stored in a list of atom/value pairs. While this is
+%% slightly less efficient, future work should be able to make use of
+%% compression to remove these sorts of common bits (block level compression
+%% with something like a shared dictionary that is checkpointed every now and
+%% then).
+from_disk_term(Fd, {Base, Extended}) when is_tuple(Base), is_list(Extended) ->
+    store(Extended, from_disk_term(Fd, Base));
+from_disk_term(Fd, {Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) ->
+    #att{
+        name=Name,
+        type=Type,
+        att_len=AttLen,
+        disk_len=DiskLen,
+        md5=Md5,
+        revpos=RevPos,
+        data={Fd,Sp},
+        encoding=upgrade_encoding(Enc)
+    };
+from_disk_term(Fd, {Name,Type,Sp,AttLen,RevPos,Md5}) ->
+    #att{
+        name=Name,
+        type=Type,
+        att_len=AttLen,
+        disk_len=AttLen,
+        md5=Md5,
+        revpos=RevPos,
+        data={Fd,Sp}
+    };
+from_disk_term(Fd, {Name,{Type,Sp,AttLen}}) ->
+    #att{
+        name=Name,
+        type=Type,
+        att_len=AttLen,
+        disk_len=AttLen,
+        md5= <<>>,
+        revpos=0,
+        data={Fd,Sp}
+    }.
+
+
+%% from_json reads in embedded JSON attachments and creates usable attachment
+%% values. The attachment may be a stub,
+from_json(Name, Props) ->
+    Type = couch_util:get_value(
+        <<"content_type">>, Props, ?DEFAULT_ATTACHMENT_CONTENT_TYPE
+    ),
+    Att = new([{name, Name}, {type, Type}]),
+    IsStub = couch_util:get_value(<<"stub">>, Props),
+    Follows = couch_util:get_value(<<"follows">>, Props),
+    if
+        IsStub -> stub_from_json(Att, Props);
+        Follows -> follow_from_json(Att, Props);
+        true -> inline_from_json(Att, Props)
+    end.
+
+
+stub_from_json(Att, Props) ->
+    {DiskLen, EncodedLen, Encoding} = encoded_lengths_from_json(Props),
+    Digest = digest_from_json(Props),
+    %% We specifically want undefined rather than the default 0 here to skip
+    %% the revpos consistency check on stubs when it's not provided in the
+    %% json object. See merge_stubs/3 for the stub check.
+    RevPos = couch_util:get_value(<<"revpos">>, Props),
+    store([
+        {md5, Digest}, {revpos, RevPos}, {data, stub}, {disk_len, DiskLen},
+        {att_len, EncodedLen}, {encoding, Encoding}
+    ], Att).
+
+
+follow_from_json(Att, Props) ->
+    {DiskLen, EncodedLen, Encoding} = encoded_lengths_from_json(Props),
+    Digest = digest_from_json(Props),
+    store([
+        {md5, Digest}, {data, follows}, {disk_len, DiskLen},
+        {att_len, EncodedLen}, {encoding, Encoding}
+    ], Att).
+
+
+inline_from_json(Att, Props) ->
+    B64Data = couch_util:get_value(<<"data">>, Props),
+    Data = base64:decode(B64Data),
+    Length = size(Data),
+    store([{data, Data}, {disk_len, Length}, {att_len, Length}], Att).
+
+
+encoded_lengths_from_json(Props) ->
+    Len = couch_util:get_value(<<"length">>, Props),
+    case couch_util:get_value(<<"encoding">>, Props) of
+        undefined ->
+            Encoding = identity,
+            EncodedLen = Len;
+        EncodingValue ->
+            EncodedLen = couch_util:get_value(<<"encoded_length">>, Props, Len),
+            Encoding = list_to_existing_atom(binary_to_list(EncodingValue))
+    end,
+    {Len, EncodedLen, Encoding}.
+
+
+digest_from_json(Props) ->
+    case couch_util:get_value(<<"digest">>, Props) of
+        <<"md5-", EncodedMd5/binary>> -> base64:decode(EncodedMd5);
+        _ -> <<>>
+    end.
+
+
+to_json(Att, OutputData, DataToFollow, ShowEncoding) ->
+    [Name, Data, DiskLen, AttLen, Enc, Type, RevPos, Md5] = fetch(
+        [name, data, disk_len, att_len, encoding, type, revpos, md5], Att
+    ),
+    Props = [
+        {<<"content_type">>, Type},
+        {<<"revpos">>, RevPos}
+    ],
+    DigestProp = case base64:encode(Md5) of
+        <<>> -> [];
+        Digest -> [{<<"digest">>, <<"md5-", Digest/binary>>}]
+    end,
+    DataProps = if
+        not OutputData orelse Data == stub ->
+            [{<<"length">>, DiskLen}, {<<"stub">>, true}];
+        DataToFollow ->
+            [{<<"length">>, DiskLen}, {<<"follows">>, true}];
+        true ->
+            AttData = case Enc of
+                gzip -> zlib:gunzip(to_binary(Att));
+                identity -> to_binary(Att)
+            end,
+            [{<<"data">>, base64:encode(AttData)}]
+    end,
+    EncodingProps = if
+        ShowEncoding andalso Enc /= identity ->
+            [
+                {<<"encoding">>, couch_util:to_binary(Enc)},
+                {<<"encoded_length">>, AttLen}
+            ];
+        true ->
+            []
+    end,
+    HeadersProp = case fetch(headers, Att) of
+        undefined -> [];
+        Headers -> [{<<"headers">>, Headers}]
+    end,
+    {Name, {Props ++ DigestProp ++ DataProps ++ EncodingProps ++ HeadersProp}}.
+
+
+flush(Fd, Att) ->
+    flush_data(Fd, fetch(data, Att), Att).
+
+
+flush_data(Fd, {Fd0, _}, Att) when Fd0 == Fd ->
+    % already written to our file, nothing to write
+    Att;
+flush_data(Fd, {OtherFd, StreamPointer}, Att) ->
+    [InMd5, InDiskLen] = fetch([md5, disk_len], Att),
+    {NewStreamData, Len, _IdentityLen, Md5, IdentityMd5} =
+        couch_stream:copy_to_new_stream(OtherFd, StreamPointer, Fd),
+    couch_db:check_md5(IdentityMd5, InMd5),
+    store([
+        {data, {Fd, NewStreamData}},
+        {md5, Md5},
+        {att_len, Len},
+        {disk_len, InDiskLen}
+    ], Att);
+flush_data(Fd, Data, Att) when is_binary(Data) ->
+    couch_db:with_stream(Fd, Att, fun(OutputStream) ->
+        couch_stream:write(OutputStream, Data)
+    end);
+flush_data(Fd, Fun, Att) when is_function(Fun) ->
+    case fetch(att_len, Att) of
+        undefined ->
+            couch_db:with_stream(Fd, Att, fun(OutputStream) ->
+                % Fun(MaxChunkSize, WriterFun) must call WriterFun
+                % once for each chunk of the attachment,
+                Fun(4096,
+                    % WriterFun({Length, Binary}, State)
+                    % WriterFun({0, _Footers}, State)
+                    % Called with Length == 0 on the last time.
+                    % WriterFun returns NewState.
+                    fun({0, Footers}, _) ->
+                        F = mochiweb_headers:from_binary(Footers),
+                        case mochiweb_headers:get_value("Content-MD5", F) of
+                        undefined ->
+                            ok;
+                        Md5 ->
+                            {md5, base64:decode(Md5)}
+                        end;
+                    ({_Length, Chunk}, _) ->
+                        couch_stream:write(OutputStream, Chunk)
+                    end, ok)
+            end);
+        AttLen ->
+            couch_db:with_stream(Fd, Att, fun(OutputStream) ->
+                write_streamed_attachment(OutputStream, Fun, AttLen)
+            end)
+    end.
+
+
+write_streamed_attachment(_Stream, _F, 0) ->
+    ok;
+write_streamed_attachment(_Stream, _F, LenLeft) when LenLeft < 0 ->
+    throw({bad_request, <<"attachment longer than expected">>});
+write_streamed_attachment(Stream, F, LenLeft) when LenLeft > 0 ->
+    Bin = try read_next_chunk(F, LenLeft)
+    catch
+        {mp_parser_died, normal} ->
+            throw({bad_request, <<"attachment shorter than expected">>})
+    end,
+    ok = couch_stream:write(Stream, Bin),
+    write_streamed_attachment(Stream, F, LenLeft - size(Bin)).
+
+read_next_chunk(F, _) when is_function(F, 0) ->
+    F();
+read_next_chunk(F, LenLeft) when is_function(F, 1) ->
+    F(lists:min([LenLeft, 16#2000])).
+
+
+foldl(Att, Fun, Acc) ->
+    foldl(fetch(data, Att), Att, Fun, Acc).
+
+
+foldl(Bin, _Att, Fun, Acc) when is_binary(Bin) ->
+    Fun(Bin, Acc);
+foldl({Fd, Sp}, Att, Fun, Acc) when is_tuple(Sp) orelse Sp == null ->
+    % 09 UPGRADE CODE
+    Len = fetch(att_len, Att),
+    couch_stream:old_foldl(Fd, Sp, Len, Fun, Acc);
+foldl({Fd, Sp}, Att, Fun, Acc) ->
+    Md5 = fetch(md5, Att),
+    couch_stream:foldl(Fd, Sp, Md5, Fun, Acc);
+foldl(DataFun, Att, Fun, Acc) when is_function(DataFun) ->
+    Len = fetch(att_len, Att),
+    fold_streamed_data(DataFun, Len, Fun, Acc).
+
+
+range_foldl(Att, From, To, Fun, Acc) ->
+    {Fd, Sp} = fetch(data, Att),
+    couch_stream:range_foldl(Fd, Sp, From, To, Fun, Acc).
+
+
+foldl_decode(Att, Fun, Acc) ->
+    case fetch([data, encoding], Att) of
+        [{Fd, Sp}, Enc] ->
+            couch_stream:foldl_decode(Fd, Sp, fetch(md5, Att), Enc, Fun, Acc);
+        [Fun2, identity] ->
+            fold_streamed_data(Fun2, fetch(att_len, Att), Fun, Acc)
+    end.
+
+
+to_binary(Att) ->
+    to_binary(fetch(data, Att), Att).
+
+
+to_binary(Bin, _Att) when is_binary(Bin) ->
+    Bin;
+to_binary(Iolist, _Att) when is_list(Iolist) ->
+    iolist_to_binary(Iolist);
+to_binary({_Fd,_Sp}, Att) ->
+    iolist_to_binary(
+        lists:reverse(foldl(Att, fun(Bin,Acc) -> [Bin|Acc] end, []))
+    );
+to_binary(DataFun, Att) when is_function(DataFun)->
+    Len = fetch(att_len, Att),
+    iolist_to_binary(
+        lists:reverse(fold_streamed_data(
+            DataFun,
+            Len,
+            fun(Data, Acc) -> [Data | Acc] end,
+            []
+        ))
+    ).
+
+
+fold_streamed_data(_RcvFun, 0, _Fun, Acc) ->
+    Acc;
+fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0->
+    Bin = RcvFun(),
+    ResultAcc = Fun(Bin, Acc),
+    fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc).
+
+
+%% Upgrade an attachment record to a property list on demand. This is a one-way
+%% operation as downgrading potentially truncates fields with important data.
+-spec upgrade(#att{}) -> attachment().
+upgrade(#att{} = Att) ->
+    Map = lists:zip(
+        record_info(fields, att),
+        lists:seq(2, record_info(size, att))
+    ),
+    %% Don't store undefined elements since that is default
+    [{F, element(I, Att)} || {F, I} <- Map, element(I, Att) /= undefined];
+upgrade(Att) ->
+    Att.
+
+
+%% Downgrade is exposed for interactive convenience. In practice, unless done
+%% manually, upgrades are always one-way.
+downgrade(#att{} = Att) ->
+    Att;
+downgrade(Att) ->
+    #att{
+        name = fetch(name, Att),
+        type = fetch(type, Att),
+        att_len = fetch(att_len, Att),
+        disk_len = fetch(disk_len, Att),
+        md5 = fetch(md5, Att),
+        revpos = fetch(revpos, Att),
+        data = fetch(data, Att),
+        encoding = fetch(encoding, Att)
+    }.
+
+
+upgrade_encoding(true) -> gzip;
+upgrade_encoding(false) -> identity;
+upgrade_encoding(Encoding) -> Encoding.
+
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+
+%% Test utilities
+
+
+empty_att() -> new().
+
+
+upgraded_empty_att() ->
+    new([{headers, undefined}]).
+
+
+%% Test groups
+
+
+attachment_upgrade_test_() ->
+    {"Lazy record upgrade tests", [
+        {"Existing record fields don't upgrade",
+            {with, empty_att(), [fun test_non_upgrading_fields/1]}
+        },
+        {"New fields upgrade",
+            {with, empty_att(), [fun test_upgrading_fields/1]}
+        }
+    ]}.
+
+
+attachment_defaults_test_() ->
+    {"Attachment defaults tests", [
+        {"Records retain old default values", [
+            {with, empty_att(), [fun test_legacy_defaults/1]}
+        ]},
+        {"Upgraded records inherit defaults", [
+            {with, upgraded_empty_att(), [fun test_legacy_defaults/1]}
+        ]},
+        {"Undefined entries are elided on upgrade", [
+            {with, upgraded_empty_att(), [fun test_elided_entries/1]}
+        ]}
+    ]}.
+
+attachment_field_api_test_() ->
+    {"Basic attachment field api", [
+        fun test_construction/0,
+        fun test_store_and_fetch/0,
+        fun test_transform/0
+    ]}.
+
+
+attachment_disk_term_test_() ->
+    BaseAttachment = new([
+        {name, <<"empty">>},
+        {type, <<"application/octet-stream">>},
+        {att_len, 0},
+        {disk_len, 0},
+        {md5, <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>},
+        {revpos, 4},
+        {data, {fake_fd, fake_sp}},
+        {encoding, identity}
+    ]),
+    BaseDiskTerm = {
+        <<"empty">>,
+        <<"application/octet-stream">>,
+        fake_sp,
+        0, 0, 4,
+        <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>,
+        identity
+    },
+    Headers = [{<<"X-Foo">>, <<"bar">>}],
+    ExtendedAttachment = store(headers, Headers, BaseAttachment),
+    ExtendedDiskTerm = {BaseDiskTerm, [{headers, Headers}]},
+    {"Disk term tests", [
+        ?_assertEqual(BaseDiskTerm, to_disk_term(BaseAttachment)),
+        ?_assertEqual(BaseAttachment, from_disk_term(fake_fd, BaseDiskTerm)),
+        ?_assertEqual(ExtendedDiskTerm, to_disk_term(ExtendedAttachment)),
+        ?_assertEqual(ExtendedAttachment, from_disk_term(fake_fd, ExtendedDiskTerm))
+    ]}.
+
+
+attachment_json_term_test_() ->
+    %% We need to create a few variations including stubs and inline data.
+    {"JSON term tests", []}.
+
+
+attachment_stub_merge_test_() ->
+    %% Stub merging needs to demonstrate revpos matching, skipping, and missing
+    %% attachment errors.
+    {"Attachment stub merging tests", []}.
+
+
+%% Test generators
+
+
+test_non_upgrading_fields(Attachment) ->
+    Pairs = [
+        {name, "cat.gif"},
+        {type, "text/very-very-plain"},
+        {att_len, 1024},
+        {disk_len, 42},
+        {md5, <<"md5-hashhashhash">>},
+        {revpos, 4},
+        {data, stub},
+        {encoding, gzip}
+    ],
+    lists:foreach(
+        fun({Field, Value}) ->
+            ?assertMatch(#att{}, Attachment),
+            Updated = store(Field, Value, Attachment),
+            ?assertMatch(#att{}, Updated)
+        end,
+    Pairs).
+
+
+test_upgrading_fields(Attachment) ->
+    ?assertMatch(#att{}, Attachment),
+    UpdatedHeaders = store(headers, [{<<"Ans">>, <<"42">>}], Attachment),
+    ?assertMatch(X when is_list(X), UpdatedHeaders),
+    UpdatedHeadersUndefined = store(headers, undefined, Attachment),
+    ?assertMatch(X when is_list(X), UpdatedHeadersUndefined).
+
+
+test_legacy_defaults(Attachment) ->
+    ?assertEqual(<<>>, fetch(md5, Attachment)),
+    ?assertEqual(0, fetch(revpos, Attachment)),
+    ?assertEqual(identity, fetch(encoding, Attachment)).
+
+
+test_elided_entries(Attachment) ->
+    ?assertNot(lists:keymember(name, 1, Attachment)),
+    ?assertNot(lists:keymember(type, 1, Attachment)),
+    ?assertNot(lists:keymember(att_len, 1, Attachment)),
+    ?assertNot(lists:keymember(disk_len, 1, Attachment)),
+    ?assertNot(lists:keymember(data, 1, Attachment)).
+
+
+test_construction() ->
+    ?assert(new() == new()),
+    Initialized = new([{name, <<"foo.bar">>}, {type, <<"application/qux">>}]),
+    ?assertEqual(<<"foo.bar">>, fetch(name, Initialized)),
+    ?assertEqual(<<"application/qux">>, fetch(type, Initialized)).
+
+
+test_store_and_fetch() ->
+    Attachment = empty_att(),
+    ?assertEqual(<<"abc">>, fetch(name, store(name, <<"abc">>, Attachment))),
+    ?assertEqual(42, fetch(ans, store(ans, 42, Attachment))).
+
+
+test_transform() ->
+    Attachment = new([{counter, 0}]),
+    Transformed = transform(counter, fun(Count) -> Count + 1 end, Attachment),
+    ?assertEqual(1, fetch(counter, Transformed)).
+
+
+-endif.

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f3e45383/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index c9dc8e4..8a9f9e4 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -31,6 +31,7 @@
 -export([check_is_admin/1, check_is_member/1, get_doc_count/1]).
 -export([reopen/1, is_system_db/1, compression/1, make_doc/5]).
 -export([load_validation_funs/1]).
+-export([check_md5/2, with_stream/3]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -191,12 +192,16 @@ apply_open_options(Else,_Options) ->
 
 apply_open_options2(Doc,[]) ->
     {ok, Doc};
-apply_open_options2(#doc{atts=Atts,revs=Revs}=Doc,
+apply_open_options2(#doc{atts=Atts0,revs=Revs}=Doc,
         [{atts_since, PossibleAncestors}|Rest]) ->
     RevPos = find_ancestor_rev_pos(Revs, PossibleAncestors),
-    apply_open_options2(Doc#doc{atts=[A#att{data=
-        if AttPos>RevPos -> Data; true -> stub end}
-        || #att{revpos=AttPos,data=Data}=A <- Atts]}, Rest);
+    Atts = lists:map(fun(Att) ->
+        [AttPos, Data] = couch_att:fetch([revpos, data], Att),
+        if  AttPos > RevPos -> couch_att:store(data, Data, Att);
+            true -> couch_att:store(data, stub, Att)
+        end
+    end, Atts0),
+    apply_open_options2(Doc#doc{atts=Atts}, Rest);
 apply_open_options2(Doc, [ejson_body | Rest]) ->
     apply_open_options2(couch_doc:with_ejson_body(Doc), Rest);
 apply_open_options2(Doc,[_|Rest]) ->
@@ -784,15 +789,21 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI
 
 
 
-new_revid(#doc{body=Body,revs={OldStart,OldRevs},
-        atts=Atts,deleted=Deleted}) ->
-    case [{N, T, M} || #att{name=N,type=T,md5=M} <- Atts, M =/= <<>>] of
-    Atts2 when length(Atts) =/= length(Atts2) ->
-        % We must have old style non-md5 attachments
-        ?l2b(integer_to_list(couch_util:rand32()));
-    Atts2 ->
-        OldRev = case OldRevs of [] -> 0; [OldRev0|_] -> OldRev0 end,
-        couch_util:md5(term_to_binary([Deleted, OldStart, OldRev, Body, Atts2]))
+new_revid(#doc{body=Body, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) ->
+    DigestedAtts = lists:foldl(fun(Att, Acc) ->
+        [N, T, M] = couch_att:fetch([name, type, md5], Att),
+        case M == <<>> of
+            true -> Acc;
+            false -> [{N, T, M} | Acc]
+        end
+    end, [], Atts),
+    case DigestedAtts of
+        Atts2 when length(Atts) =/= length(Atts2) ->
+            % We must have old style non-md5 attachments
+            ?l2b(integer_to_list(couch_util:rand32()));
+        Atts2 ->
+            OldRev = case OldRevs of [] -> 0; [OldRev0|_] -> OldRev0 end,
+            couch_util:md5(term_to_binary([Deleted, OldStart, OldRev, Body, Atts2]))
     end.
 
 new_revs([], OutBuckets, IdRevsAcc) ->
@@ -807,18 +818,15 @@ new_revs([Bucket|RestBuckets], OutBuckets, IdRevsAcc) ->
     new_revs(RestBuckets, [NewBucket|OutBuckets], IdRevsAcc3).
 
 check_dup_atts(#doc{atts=Atts}=Doc) ->
-    Atts2 = lists:sort(fun(#att{name=N1}, #att{name=N2}) -> N1 < N2 end, Atts),
-    check_dup_atts2(Atts2),
+    lists:foldl(fun(Att, Names) ->
+        Name = couch_att:fetch(name, Att),
+        case ordsets:is_element(Name, Names) of
+            true -> throw({bad_request, <<"Duplicate attachments">>});
+            false -> ordsets:add_element(Name, Names)
+        end
+    end, ordsets:new(), Atts),
     Doc.
 
-check_dup_atts2([#att{name=N}, #att{name=N} | _]) ->
-    throw({bad_request, <<"Duplicate attachments">>});
-check_dup_atts2([_ | Rest]) ->
-    check_dup_atts2(Rest);
-check_dup_atts2(_) ->
-    ok.
-
-
 tag_docs([]) ->
     [];
 tag_docs([#doc{meta=Meta}=Doc | Rest]) ->
@@ -1000,11 +1008,10 @@ write_and_commit(#db{main_pid=Pid, user_ctx=Ctx}=Db, DocBuckets1,
 prepare_doc_summaries(Db, BucketList) ->
     [lists:map(
         fun(#doc{body = Body, atts = Atts} = Doc) ->
-            DiskAtts = [{N, T, P, AL, DL, R, M, E} ||
-                #att{name = N, type = T, data = {_, P}, md5 = M, revpos = R,
-                    att_len = AL, disk_len = DL, encoding = E} <- Atts],
+            DiskAtts = [couch_att:to_disk_term(Att) || Att <- Atts],
             AttsFd = case Atts of
-            [#att{data = {Fd, _}} | _] ->
+            [Att | _] ->
+                {Fd, _} = couch_att:fetch(data, Att),
                 Fd;
             [] ->
                 nil
@@ -1025,86 +1032,24 @@ before_docs_update(#db{before_doc_update = Fun} = Db, BucketList) ->
         Bucket) || Bucket <- BucketList].
 
 
-set_new_att_revpos(#doc{revs={RevPos,_Revs},atts=Atts}=Doc) ->
-    Doc#doc{atts= lists:map(fun(#att{data={_Fd,_Sp}}=Att) ->
-            % already commited to disk, do not set new rev
-            Att;
-        (Att) ->
-            Att#att{revpos=RevPos+1}
-        end, Atts)}.
+set_new_att_revpos(#doc{revs={RevPos,_Revs},atts=Atts0}=Doc) ->
+    Atts = lists:map(
+        fun(Att) ->
+            case couch_att:fetch(data, Att) of
+                {_Fd, _Sp} -> Att; % already commited to disk, don't set new rev
+                _ -> couch_att:store(revpos, RevPos+1, Att)
+            end
+        end, Atts0),
+    Doc#doc{atts = Atts}.
 
 
 doc_flush_atts(Doc, Fd) ->
-    Doc#doc{atts=[flush_att(Fd, Att) || Att <- Doc#doc.atts]}.
+    Doc#doc{atts=[couch_att:flush(Fd, Att) || Att <- Doc#doc.atts]}.
 
 check_md5(_NewSig, <<>>) -> ok;
 check_md5(Sig, Sig) -> ok;
 check_md5(_, _) -> throw(md5_mismatch).
 
-flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd ->
-    % already written to our file, nothing to write
-    Att;
-
-flush_att(Fd, #att{data={OtherFd,StreamPointer}, md5=InMd5,
-    disk_len=InDiskLen} = Att) ->
-    {NewStreamData, Len, _IdentityLen, Md5, IdentityMd5} =
-            couch_stream:copy_to_new_stream(OtherFd, StreamPointer, Fd),
-    check_md5(IdentityMd5, InMd5),
-    Att#att{data={Fd, NewStreamData}, md5=Md5, att_len=Len, disk_len=InDiskLen};
-
-flush_att(Fd, #att{data=Data}=Att) when is_binary(Data) ->
-    with_stream(Fd, Att, fun(OutputStream) ->
-        couch_stream:write(OutputStream, Data)
-    end);
-
-flush_att(Fd, #att{data=Fun,att_len=undefined}=Att) when is_function(Fun) ->
-    MaxChunkSize = list_to_integer(
-        config:get("couchdb", "attachment_stream_buffer_size", "4096")),
-    with_stream(Fd, Att, fun(OutputStream) ->
-        % Fun(MaxChunkSize, WriterFun) must call WriterFun
-        % once for each chunk of the attachment,
-        Fun(MaxChunkSize,
-            % WriterFun({Length, Binary}, State)
-            % WriterFun({0, _Footers}, State)
-            % Called with Length == 0 on the last time.
-            % WriterFun returns NewState.
-            fun({0, Footers}, _) ->
-                F = mochiweb_headers:from_binary(Footers),
-                case mochiweb_headers:get_value("Content-MD5", F) of
-                undefined ->
-                    ok;
-                Md5 ->
-                    {md5, base64:decode(Md5)}
-                end;
-            ({_Length, Chunk}, _) ->
-                couch_stream:write(OutputStream, Chunk)
-            end, ok)
-    end);
-
-flush_att(Fd, #att{data=Fun,att_len=AttLen}=Att) when is_function(Fun) ->
-    with_stream(Fd, Att, fun(OutputStream) ->
-        write_streamed_attachment(OutputStream, Fun, AttLen)
-    end);
-
-flush_att(Fd, #att{data={follows, Parser, Ref}}=Att) when is_pid(Parser) ->
-    ParserRef = erlang:monitor(process, Parser),
-    Fun = fun() ->
-        Parser ! {get_bytes, Ref, self()},
-        receive
-            {started_open_doc_revs, NewRef} ->
-                couch_doc:restart_open_doc_revs(Parser, Ref, NewRef);
-            {bytes, Ref, Bytes} ->
-                Bytes;
-            {'DOWN', ParserRef, _, _, Reason} ->
-                throw({mp_parser_died, Reason})
-        end
-    end,
-    try
-        flush_att(Fd, Att#att{data=Fun})
-    after
-        erlang:demonitor(ParserRef, [flush])
-    end.
-
 
 compressible_att_type(MimeType) when is_binary(MimeType) ->
     compressible_att_type(?b2l(MimeType));
@@ -1133,7 +1078,8 @@ compressible_att_type(MimeType) ->
 % is present in the request, but there is no Content-MD5
 % trailer, we're free to ignore this inconsistency and
 % pretend that no Content-MD5 exists.
-with_stream(Fd, #att{md5=InMd5,type=Type,encoding=Enc}=Att, Fun) ->
+with_stream(Fd, Att, Fun) ->
+    [InMd5, Type, Enc] = couch_att:fetch([md5, type, encoding], Att),
     BufferSize = list_to_integer(
         config:get("couchdb", "attachment_stream_buffer_size", "4096")),
     {ok, OutputStream} = case (Enc =:= identity) andalso
@@ -1168,43 +1114,25 @@ with_stream(Fd, #att{md5=InMd5,type=Type,encoding=Enc}=Att, Fun) ->
             {Len, IdentityLen, gzip}
         end;
     gzip ->
-        case {Att#att.att_len, Att#att.disk_len} of
-        {AL, DL} when AL =:= undefined orelse DL =:= undefined ->
-            % Compressed attachment uploaded through the standalone API.
-            {Len, Len, gzip};
-        {AL, DL} ->
-            % This case is used for efficient push-replication, where a
-            % compressed attachment is located in the body of multipart
-            % content-type request.
-            {AL, DL, gzip}
+        case couch_att:fetch([att_len, disk_len], Att) of
+            [AL, DL] when AL =:= undefined orelse DL =:= undefined ->
+                % Compressed attachment uploaded through the standalone API.
+                {Len, Len, gzip};
+            [AL, DL] ->
+                % This case is used for efficient push-replication, where a
+                % compressed attachment is located in the body of multipart
+                % content-type request.
+                {AL, DL, gzip}
         end
     end,
-    Att#att{
-        data={Fd,StreamInfo},
-        att_len=AttLen,
-        disk_len=DiskLen,
-        md5=Md5,
-        encoding=NewEnc
-    }.
-
-
-write_streamed_attachment(_Stream, _F, 0) ->
-    ok;
-write_streamed_attachment(_Stream, _F, LenLeft) when LenLeft < 0 ->
-    throw({bad_request, <<"attachment longer than expected">>});
-write_streamed_attachment(Stream, F, LenLeft) when LenLeft > 0 ->
-    Bin = try read_next_chunk(F, LenLeft)
-    catch
-        {mp_parser_died, normal} ->
-            throw({bad_request, <<"attachment shorter than expected">>})
-    end,
-    ok = couch_stream:write(Stream, Bin),
-    write_streamed_attachment(Stream, F, LenLeft - size(Bin)).
+    couch_att:store([
+        {data, {Fd,StreamInfo}},
+        {att_len, AttLen},
+        {disk_len, DiskLen},
+        {md5, Md5},
+        {encoding, NewEnc}
+    ], Att).
 
-read_next_chunk(F, _) when is_function(F, 0) ->
-    F();
-read_next_chunk(F, LenLeft) when is_function(F, 1) ->
-    F(lists:min([LenLeft, 16#2000])).
 
 enum_docs_since_reduce_to_count(Reds) ->
     couch_btree:final_reduce(
@@ -1367,60 +1295,28 @@ read_doc(#db{fd=Fd}, Pos) ->
     couch_file:pread_term(Fd, Pos).
 
 
-make_doc(#db{fd = Fd} = Db, Id, Deleted, Bp, RevisionPath) ->
-    {BodyData, Atts} =
-    case Bp of
-    nil ->
-        {[], []};
-    _ ->
-        {ok, {BodyData0, Atts00}} = read_doc(Db, Bp),
-        Atts0 = case Atts00 of
-        _ when is_binary(Atts00) ->
-            couch_compress:decompress(Atts00);
-        _ when is_list(Atts00) ->
-            % pre 1.2 format
-            Atts00
-        end,
-        {BodyData0,
-            lists:map(
-                fun({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) ->
-                    #att{name=Name,
-                        type=Type,
-                        att_len=AttLen,
-                        disk_len=DiskLen,
-                        md5=Md5,
-                        revpos=RevPos,
-                        data={Fd,Sp},
-                        encoding=
-                            case Enc of
-                            true ->
-                                % 0110 UPGRADE CODE
-                                gzip;
-                            false ->
-                                % 0110 UPGRADE CODE
-                                identity;
-                            _ ->
-                                Enc
-                            end
-                    };
-                ({Name,Type,Sp,AttLen,RevPos,Md5}) ->
-                    #att{name=Name,
-                        type=Type,
-                        att_len=AttLen,
-                        disk_len=AttLen,
-                        md5=Md5,
-                        revpos=RevPos,
-                        data={Fd,Sp}};
-                ({Name,{Type,Sp,AttLen}}) ->
-                    #att{name=Name,
-                        type=Type,
-                        att_len=AttLen,
-                        disk_len=AttLen,
-                        md5= <<>>,
-                        revpos=0,
-                        data={Fd,Sp}}
-                end, Atts0)}
+make_doc(_Db, Id, Deleted, nil = _Bp, RevisionPath) ->
+    #doc{
+        id = Id,
+        revs = RevisionPath,
+        body = [],
+        atts = [],
+        deleted = Deleted
+    };
+make_doc(#db{fd=Fd}=Db, Id, Deleted, Bp, RevisionPath) ->
+    {BodyData, Atts0} = case Bp of
+        nil ->
+            {[], []};
+        _ ->
+            case read_doc(Db, Bp) of
+                {ok, {BodyData0, Atts1}} when is_binary(Atts1) ->
+                    {BodyData0, couch_compress:decompress(Atts1)};
+                {ok, {BodyData0, Atts1}} when is_list(Atts1) ->
+                    % pre 1.2 format
+                    {BodyData0, Atts1}
+            end
     end,
+    Atts = [couch_att:from_disk_term(Fd, T) || T <- Atts0],
     Doc = #doc{
         id = Id,
         revs = RevisionPath,

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f3e45383/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 2d326f6..88fa3b3 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -591,7 +591,7 @@ flush_trees(#db{fd = Fd} = Db,
                 {ok, NewSummaryPointer, SummarySize} =
                     couch_file:append_raw_chunk(Fd, Summary),
                 TotalSize = lists:foldl(
-                    fun(#att{att_len = L}, A) -> A + L end,
+                    fun(Att, A) -> A + couch_att:fetch(att_len, Att) end,
                     SummarySize, Value#doc.atts),
                 NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
                                  seq=UpdateSeq, size=TotalSize},

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f3e45383/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index d82d626..74450a4 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -13,9 +13,8 @@
 -module(couch_doc).
 
 -export([to_doc_info/1,to_doc_info_path/1,parse_rev/1,parse_revs/1,rev_to_str/1,revs_to_strs/1]).
--export([att_foldl/3,range_att_foldl/5,att_foldl_decode/3,get_validate_doc_fun/1]).
 -export([from_json_obj/1,to_json_obj/2,has_stubs/1, merge_stubs/2]).
--export([validate_docid/1]).
+-export([validate_docid/1, get_validate_doc_fun/1]).
 -export([doc_from_multi_part_stream/2, doc_from_multi_part_stream/3]).
 -export([doc_to_multi_part_stream/5, len_doc_to_multi_part_stream/4]).
 -export([abort_multi_part_stream/1, restart_open_doc_revs/3]).
@@ -98,50 +97,11 @@ to_json_attachments(Attachments, Options) ->
         lists:member(att_encoding_info, Options)
     ).
 
-to_json_attachments([], _OutputData, _DataToFollow, _ShowEncInfo) ->
+to_json_attachments([], _OutputData, _Follows, _ShowEnc) ->
     [];
-to_json_attachments(Atts, OutputData, DataToFollow, ShowEncInfo) ->
-    AttProps = lists:map(
-        fun(#att{disk_len=DiskLen, att_len=AttLen, encoding=Enc}=Att) ->
-            {Att#att.name, {[
-                {<<"content_type">>, Att#att.type},
-                {<<"revpos">>, Att#att.revpos}] ++
-                case Att#att.md5 of
-                    <<>> ->
-                        [];
-                    Md5 ->
-                        EncodedMd5 = base64:encode(Md5),
-                        [{<<"digest">>, <<"md5-",EncodedMd5/binary>>}]
-                end ++
-                if not OutputData orelse Att#att.data == stub ->
-                    [{<<"length">>, DiskLen}, {<<"stub">>, true}];
-                true ->
-                    if DataToFollow ->
-                        [{<<"length">>, DiskLen}, {<<"follows">>, true}];
-                    true ->
-                        AttData = case Enc of
-                        gzip ->
-                            zlib:gunzip(att_to_bin(Att));
-                        identity ->
-                            att_to_bin(Att)
-                        end,
-                        [{<<"data">>, base64:encode(AttData)}]
-                    end
-                end ++
-                    case {ShowEncInfo, Enc} of
-                    {false, _} ->
-                        [];
-                    {true, identity} ->
-                        [];
-                    {true, _} ->
-                        [
-                            {<<"encoding">>, couch_util:to_binary(Enc)},
-                            {<<"encoded_length">>, AttLen}
-                        ]
-                    end
-            }}
-        end, Atts),
-    [{<<"_attachments">>, {AttProps}}].
+to_json_attachments(Atts, OutputData, Follows, ShowEnc) ->
+    Props = [couch_att:to_json(A, OutputData, Follows, ShowEnc) || A <- Atts],
+    [{<<"_attachments">>, {Props}}].
 
 to_json_obj(Doc, Options) ->
     doc_to_json_obj(with_ejson_body(Doc), Options).
@@ -227,40 +187,7 @@ transfer_fields([{<<"_rev">>, _Rev} | Rest], Doc) ->
     transfer_fields(Rest,Doc);
 
 transfer_fields([{<<"_attachments">>, {JsonBins}} | Rest], Doc) ->
-    Atts = lists:map(fun({Name, {BinProps}}) ->
-        Md5 = case couch_util:get_value(<<"digest">>, BinProps) of
-            <<"md5-",EncodedMd5/binary>> ->
-                base64:decode(EncodedMd5);
-            _ ->
-               <<>>
-        end,
-        case couch_util:get_value(<<"stub">>, BinProps) of
-        true ->
-            Type = couch_util:get_value(<<"content_type">>, BinProps),
-            RevPos = couch_util:get_value(<<"revpos">>, BinProps, nil),
-            DiskLen = couch_util:get_value(<<"length">>, BinProps),
-            {Enc, EncLen} = att_encoding_info(BinProps),
-            #att{name=Name, data=stub, type=Type, att_len=EncLen,
-                disk_len=DiskLen, encoding=Enc, revpos=RevPos, md5=Md5};
-        _ ->
-            Type = couch_util:get_value(<<"content_type">>, BinProps,
-                    ?DEFAULT_ATTACHMENT_CONTENT_TYPE),
-            RevPos = couch_util:get_value(<<"revpos">>, BinProps, 0),
-            case couch_util:get_value(<<"follows">>, BinProps) of
-            true ->
-                DiskLen = couch_util:get_value(<<"length">>, BinProps),
-                {Enc, EncLen} = att_encoding_info(BinProps),
-                #att{name=Name, data=follows, type=Type, encoding=Enc,
-                    att_len=EncLen, disk_len=DiskLen, revpos=RevPos, md5=Md5};
-            _ ->
-                Value = couch_util:get_value(<<"data">>, BinProps),
-                Bin = base64:decode(Value),
-                LenBin = size(Bin),
-                #att{name=Name, data=Bin, type=Type, att_len=LenBin,
-                        disk_len=LenBin, revpos=RevPos}
-            end
-        end
-    end, JsonBins),
+    Atts = [couch_att:from_json(Name, Props) || {Name, {Props}} <- JsonBins],
     transfer_fields(Rest, Doc#doc{atts=Atts});
 
 transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc) ->
@@ -316,16 +243,6 @@ transfer_fields([{<<"_",Name/binary>>, _} | _], _) ->
 transfer_fields([Field | Rest], #doc{body=Fields}=Doc) ->
     transfer_fields(Rest, Doc#doc{body=[Field|Fields]}).
 
-att_encoding_info(BinProps) ->
-    DiskLen = couch_util:get_value(<<"length">>, BinProps),
-    case couch_util:get_value(<<"encoding">>, BinProps) of
-    undefined ->
-        {identity, DiskLen};
-    Enc ->
-        EncodedLen = couch_util:get_value(<<"encoded_length">>, BinProps, DiskLen),
-        {list_to_existing_atom(?b2l(Enc)), EncodedLen}
-    end.
-
 to_doc_info(FullDocInfo) ->
     {DocInfo, _Path} = to_doc_info_path(FullDocInfo),
     DocInfo.
@@ -387,61 +304,6 @@ is_deleted(Tree) ->
     end.
 
 
-att_foldl(#att{data=Bin}, Fun, Acc) when is_binary(Bin) ->
-    Fun(Bin, Acc);
-att_foldl(#att{data={Fd,Sp},md5=Md5}, Fun, Acc) ->
-    couch_stream:foldl(Fd, Sp, Md5, Fun, Acc);
-att_foldl(#att{data=DataFun,att_len=Len}, Fun, Acc) when is_function(DataFun) ->
-   fold_streamed_data(DataFun, Len, Fun, Acc);
-att_foldl(#att{data={follows, Parser, Ref}}=Att, Fun, Acc) ->
-    ParserRef = erlang:monitor(process, Parser),
-    DataFun = fun() ->
-        Parser ! {get_bytes, Ref, self()},
-        receive
-            {started_open_doc_revs, NewRef} ->
-                couch_doc:restart_open_doc_revs(Parser, Ref, NewRef);
-            {bytes, Ref, Bytes} ->
-                Bytes;
-            {'DOWN', ParserRef, _, _, Reason} ->
-                throw({mp_parser_died, Reason})
-        end
-    end,
-    try
-        att_foldl(Att#att{data=DataFun}, Fun, Acc)
-    after
-        erlang:demonitor(ParserRef, [flush])
-    end.
-
-range_att_foldl(#att{data={Fd,Sp}}, From, To, Fun, Acc) ->
-   couch_stream:range_foldl(Fd, Sp, From, To, Fun, Acc).
-
-att_foldl_decode(#att{data={Fd,Sp},md5=Md5,encoding=Enc}, Fun, Acc) ->
-    couch_stream:foldl_decode(Fd, Sp, Md5, Enc, Fun, Acc);
-att_foldl_decode(#att{data=Fun2,att_len=Len, encoding=identity}, Fun, Acc) ->
-       fold_streamed_data(Fun2, Len, Fun, Acc).
-
-att_to_bin(#att{data=Bin}) when is_binary(Bin) ->
-    Bin;
-att_to_bin(#att{data=Iolist}) when is_list(Iolist) ->
-    iolist_to_binary(Iolist);
-att_to_bin(#att{data={_Fd,_Sp}}=Att) ->
-    iolist_to_binary(
-        lists:reverse(att_foldl(
-                Att,
-                fun(Bin,Acc) -> [Bin|Acc] end,
-                []
-        ))
-    );
-att_to_bin(#att{data=DataFun, att_len=Len}) when is_function(DataFun)->
-    iolist_to_binary(
-        lists:reverse(fold_streamed_data(
-            DataFun,
-            Len,
-            fun(Data, Acc) -> [Data | Acc] end,
-            []
-        ))
-    ).
-
 get_validate_doc_fun({Props}) ->
     get_validate_doc_fun(couch_doc:from_json_obj({Props}));
 get_validate_doc_fun(#doc{body={Props}}=DDoc) ->
@@ -456,50 +318,26 @@ get_validate_doc_fun(#doc{body={Props}}=DDoc) ->
 
 
 has_stubs(#doc{atts=Atts}) ->
-    has_stubs(Atts);
-has_stubs([]) ->
-    false;
-has_stubs([#att{data=stub}|_]) ->
-    true;
-has_stubs([_Att|Rest]) ->
-    has_stubs(Rest).
+    lists:any(fun couch_att:is_stub/1, Atts);
+has_stubs(Atts) ->
+    lists:any(fun couch_att:is_stub/1, Atts).
 
 merge_stubs(#doc{id = Id}, nil) ->
     throw({missing_stub, <<"Previous revision missing for document ", Id/binary>>});
 merge_stubs(#doc{id=Id,atts=MemBins}=StubsDoc, #doc{atts=DiskBins}) ->
-    BinDict = dict:from_list([{Name, Att} || #att{name=Name}=Att <- DiskBins]),
-    MergedBins = lists:map(
-        fun(#att{name=Name, data=stub, revpos=StubRevPos}) ->
-            case dict:find(Name, BinDict) of
-            {ok, #att{revpos=DiskRevPos}=DiskAtt}
-                    when DiskRevPos == StubRevPos orelse StubRevPos == nil ->
-                DiskAtt;
-            _ ->
-                throw({missing_stub,
-                        <<"id:", Id/binary, ", name:", Name/binary>>})
-            end;
-        (Att) ->
-            Att
-        end, MemBins),
-    StubsDoc#doc{atts= MergedBins}.
-
-fold_streamed_data(_RcvFun, 0, _Fun, Acc) ->
-    Acc;
-fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0->
-    Bin = RcvFun(),
-    ResultAcc = Fun(Bin, Acc),
-    fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc).
+    case couch_att:merge_stubs(MemBins, DiskBins) of
+        {ok, MergedBins} ->
+            StubsDoc#doc{atts = MergedBins};
+        {missing, Name} ->
+            throw({missing_stub,
+                <<"Invalid attachment stub in ", Id/binary, " for ", Name/binary>>
+            })
+    end.
 
 len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, SendEncodedAtts) ->
     AttsSize = lists:foldl(fun(Att, AccAttsSize) ->
-            #att{
-                data=Data,
-                name=Name,
-                att_len=AttLen,
-                disk_len=DiskLen,
-                type=Type,
-                encoding=Encoding
-            } = Att,
+            [Data, Name, AttLen, DiskLen, Type, Encoding] =
+                 couch_att:fetch([data, name, att_len, disk_len, type, encoding], Att),
             case Data of
             stub ->
                 AccAttsSize;
@@ -552,7 +390,7 @@ len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, SendEncodedAtts) ->
 
 doc_to_multi_part_stream(Boundary, JsonBytes, Atts, WriteFun,
     SendEncodedAtts) ->
-    case lists:any(fun(#att{data=Data})-> Data /= stub end, Atts) of
+    case lists:any(fun(Att)-> couch_att:fetch(data, Att) /= stub end, Atts) of
     true ->
         WriteFun([<<"--", Boundary/binary,
                 "\r\nContent-Type: application/json\r\n\r\n">>,
@@ -564,46 +402,39 @@ doc_to_multi_part_stream(Boundary, JsonBytes, Atts, WriteFun,
 
 atts_to_mp([], _Boundary, WriteFun, _SendEncAtts) ->
     WriteFun(<<"--">>);
-atts_to_mp([#att{data=stub} | RestAtts], Boundary, WriteFun,
-        SendEncodedAtts) ->
-    atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts);
-atts_to_mp([Att | RestAtts], Boundary, WriteFun,
-        SendEncodedAtts)  ->
-    #att{
-        name=Name,
-        att_len=AttLen,
-        disk_len=DiskLen,
-        type=Type,
-        encoding=Encoding
-    } = Att,
-
-    % write headers
-    LengthBin = case SendEncodedAtts of
-    true -> list_to_binary(integer_to_list(AttLen));
-    false -> list_to_binary(integer_to_list(DiskLen))
-    end,
-    WriteFun(<<"\r\nContent-Disposition: attachment; filename=\"", Name/binary, "\"">>),
-    WriteFun(<<"\r\nContent-Type: ", Type/binary>>),
-    WriteFun(<<"\r\nContent-Length: ", LengthBin/binary>>),
-    case Encoding of
-    identity ->
-        ok;
-    _ ->
-        EncodingBin = atom_to_binary(Encoding, latin1),
-        WriteFun(<<"\r\nContent-Encoding: ", EncodingBin/binary>>)
-    end,
-
-    % write data
-    WriteFun(<<"\r\n\r\n">>),
-    AttFun = case SendEncodedAtts of
-    false ->
-        fun att_foldl_decode/3;
-    true ->
-        fun att_foldl/3
-    end,
-    AttFun(Att, fun(Data, _) -> WriteFun(Data) end, ok),
-    WriteFun(<<"\r\n--", Boundary/binary>>),
-    atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts).
+atts_to_mp([Att | RestAtts], Boundary, WriteFun, SendEncodedAtts)  ->
+    case couch_att:is_stub(Att) of
+        true ->
+            atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts);
+        false ->
+            [Name, AttLen, DiskLen, Type, Encoding] =
+                couch_att:fetch([name, att_len, disk_len, type, encoding], Att),
+            % write headers
+            LengthBin = case SendEncodedAtts of
+                true  -> list_to_binary(integer_to_list(AttLen));
+                false -> list_to_binary(integer_to_list(DiskLen))
+            end,
+            WriteFun(<<"\r\nContent-Disposition: attachment; filename=\"", Name/binary, "\"">>),
+            WriteFun(<<"\r\nContent-Type: ", Type/binary>>),
+            WriteFun(<<"\r\nContent-Length: ", LengthBin/binary>>),
+            case Encoding of
+                identity ->
+                    ok;
+                _ ->
+                    EncodingBin = atom_to_binary(Encoding, latin1),
+                    WriteFun(<<"\r\nContent-Encoding: ", EncodingBin/binary>>)
+            end,
+
+            % write data
+            WriteFun(<<"\r\n\r\n">>),
+            AttFun = case SendEncodedAtts of
+                false -> fun couch_att:foldl_decode/3;
+                true  -> fun couch_att:foldl/3
+            end,
+            AttFun(Att, fun(Data, _) -> WriteFun(Data) end, ok),
+            WriteFun(<<"\r\n--", Boundary/binary>>),
+            atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts)
+    end.
 
 
 doc_from_multi_part_stream(ContentType, DataFun) ->
@@ -630,17 +461,13 @@ doc_from_multi_part_stream(ContentType, DataFun, Ref) ->
         Doc = from_json_obj(?JSON_DECODE(DocBytes)),
         % we'll send the Parser process ID to the remote nodes so they can
         % retrieve their own copies of the attachment data
-        Atts2 = lists:map(
-            fun(#att{data=follows}=A) ->
-                A#att{data={follows, Parser, Ref}};
-            (A) ->
-                A
-            end, Doc#doc.atts),
+        WithParser = fun(follows) -> {follows, Parser, Ref}; (D) -> D end,
+        Atts = [couch_att:transform(data, WithParser, A) || A <- Doc#doc.atts],
         WaitFun = fun() ->
             receive {'DOWN', ParserRef, _, _, _} -> ok end,
             erlang:put(mochiweb_request_recv, true)
         end,
-        {ok, Doc#doc{atts=Atts2}, WaitFun, Parser};
+        {ok, Doc#doc{atts=Atts}, WaitFun, Parser};
     {'DOWN', ParserRef, _, _, normal} ->
         ok;
     {'DOWN', ParserRef, process, Parser, {{nocatch, {Error, Msg}}, _}} ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f3e45383/src/couch_httpd_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_db.erl b/src/couch_httpd_db.erl
index 8689944..77d8788 100644
--- a/src/couch_httpd_db.erl
+++ b/src/couch_httpd_db.erl
@@ -547,16 +547,19 @@ db_doc_req(#httpd{method='POST'}=Req, Db, DocId) ->
         Doc = couch_doc_from_req(Req, DocId, ?JSON_DECODE(Json))
     end,
     UpdatedAtts = [
-        #att{name=validate_attachment_name(Name),
-            type=list_to_binary(ContentType),
-            data=Content} ||
+        couch_att:new([
+            {name, validate_attachment_name(Name)},
+            {type, list_to_binary(ContentType)},
+            {data, Content}
+        ]) ||
         {Name, {ContentType, _}, Content} <-
         proplists:get_all_values("_attachments", Form)
     ],
     #doc{atts=OldAtts} = Doc,
     OldAtts2 = lists:flatmap(
-        fun(#att{name=OldName}=Att) ->
-            case [1 || A <- UpdatedAtts, A#att.name == OldName] of
+        fun(Att) ->
+            OldName = couch_att:fetch(name, Att),
+            case [1 || A <- UpdatedAtts, couch_att:fetch(name, A) == OldName] of
             [] -> [Att]; % the attachment wasn't in the UpdatedAtts, return it
             _ -> [] % the attachment was in the UpdatedAtts, drop it
             end
@@ -684,7 +687,7 @@ send_ranges_multipart(Req, ContentType, Len, Att, Ranges) ->
             <<"\r\nContent-Type: ", ContentType/binary, "\r\n",
             "Content-Range: ", ContentRange/binary, "\r\n",
            "\r\n">>),
-        couch_doc:range_att_foldl(Att, From, To + 1,
+        couch_att:range_foldl(Att, From, To + 1,
             fun(Seg, _) -> send_chunk(Resp, Seg) end, {ok, Resp}),
         couch_httpd:send_chunk(Resp, <<"\r\n--", Boundary/binary>>)
     end, Ranges),
@@ -833,13 +836,14 @@ db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNa
     #doc{
         atts=Atts
     } = Doc = couch_doc_open(Db, DocId, Rev, Options),
-    case [A || A <- Atts, A#att.name == FileName] of
+    case [A || A <- Atts, couch_att:fetch(name, A) == FileName] of
     [] ->
         throw({not_found, "Document is missing attachment"});
-    [#att{type=Type, encoding=Enc, disk_len=DiskLen, att_len=AttLen}=Att] ->
-        Etag = case Att#att.md5 of
+    [Att] ->
+        [Type, Enc, DiskLen, AttLen, Md5] = couch_att:fetch([type, encoding, disk_len, att_len, md5], Att),
+        Etag = case Md5 of
             <<>> -> couch_httpd:doc_etag(Doc);
-            Md5 -> "\"" ++ ?b2l(base64:encode(Md5)) ++ "\""
+            _ -> "\"" ++ ?b2l(base64:encode(Md5)) ++ "\""
         end,
         ReqAcceptsAttEnc = lists:member(
            atom_to_list(Enc),
@@ -883,9 +887,9 @@ db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNa
         end,
         AttFun = case ReqAcceptsAttEnc of
         false ->
-            fun couch_doc:att_foldl_decode/3;
+            fun couch_att:foldl_decode/3;
         true ->
-            fun couch_doc:att_foldl/3
+            fun couch_att:foldl/3
         end,
         couch_httpd:etag_respond(
             Req,
@@ -903,14 +907,14 @@ db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNa
                             Headers1 = [{"Content-Range", make_content_range(From, To, Len)}]
                                 ++ Headers,
                             {ok, Resp} = start_response_length(Req, 206, Headers1, To - From + 1),
-                            couch_doc:range_att_foldl(Att, From, To + 1,
+                            couch_att:range_foldl(Att, From, To + 1,
                                 fun(Seg, _) -> send(Resp, Seg) end, {ok, Resp});
                         {identity, Ranges} when is_list(Ranges) andalso length(Ranges) < 10 ->
                             send_ranges_multipart(Req, Type, Len, Att, Ranges);
                         _ ->
                             Headers1 = Headers ++
                                 if Enc =:= identity orelse ReqAcceptsAttEnc =:= true ->
-                                    [{"Content-MD5", base64:encode(Att#att.md5)}];
+                                    [{"Content-MD5", base64:encode(Md5)}];
                                 true ->
                                     []
                             end,
@@ -929,60 +933,54 @@ db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileN
                     mochiweb_util:join(
                         lists:map(fun binary_to_list/1,
                             FileNameParts),"/")),
-
     NewAtt = case Method of
         'DELETE' ->
             [];
         _ ->
-            [#att{
-                name = FileName,
-                type = case couch_httpd:header_value(Req,"Content-Type") of
-                    undefined ->
-                        % We could throw an error here or guess by the FileName.
-                        % Currently, just giving it a default.
-                        <<"application/octet-stream">>;
-                    CType ->
-                        list_to_binary(CType)
+            MimeType = case couch_httpd:header_value(Req,"Content-Type") of
+                % We could throw an error here or guess by the FileName.
+                % Currently, just giving it a default.
+                undefined -> <<"application/octet-stream">>;
+                CType -> list_to_binary(CType)
+            end,
+            Data = case couch_httpd:body_length(Req) of
+                undefined ->
+                    <<"">>;
+                {unknown_transfer_encoding, Unknown} ->
+                    exit({unknown_transfer_encoding, Unknown});
+                chunked ->
+                    fun(MaxChunkSize, ChunkFun, InitState) ->
+                        couch_httpd:recv_chunked(
+                            Req, MaxChunkSize, ChunkFun, InitState
+                        )
+                    end;
+                0 ->
+                    <<"">>;
+                Length when is_integer(Length) ->
+                    Expect = case couch_httpd:header_value(Req, "expect") of
+                        undefined ->
+                            undefined;
+                        Value when is_list(Value) ->
+                            string:to_lower(Value)
                     end,
-                data = case couch_httpd:body_length(Req) of
-                    undefined ->
-                        <<"">>;
-                    {unknown_transfer_encoding, Unknown} ->
-                        exit({unknown_transfer_encoding, Unknown});
-                    chunked ->
-                        fun(MaxChunkSize, ChunkFun, InitState) ->
-                            couch_httpd:recv_chunked(Req, MaxChunkSize,
-                                ChunkFun, InitState)
-                        end;
-                    0 ->
-                        <<"">>;
-                    Length when is_integer(Length) ->
-                        Expect = case couch_httpd:header_value(Req, "expect") of
-                                     undefined ->
-                                         undefined;
-                                     Value when is_list(Value) ->
-                                         string:to_lower(Value)
-                                 end,
-                        case Expect of
-                            "100-continue" ->
-                                MochiReq:start_raw_response({100, gb_trees:empty()});
-                            _Else ->
-                                ok
-                        end,
-
-
-                        fun(Size) -> couch_httpd:recv(Req, Size) end
+                    case Expect of
+                        "100-continue" ->
+                            MochiReq:start_raw_response({100, gb_trees:empty()});
+                        _Else ->
+                            ok
                     end,
-                att_len = case couch_httpd:header_value(Req,"Content-Length") of
-                    undefined ->
-                        undefined;
-                    Length ->
-                        list_to_integer(Length)
-                    end,
-                md5 = get_md5_header(Req),
-                encoding = case string:to_lower(string:strip(
-                    couch_httpd:header_value(Req,"Content-Encoding","identity")
-                )) of
+                    fun() -> couch_httpd:recv(Req, 0) end;
+                Length ->
+                    exit({length_not_integer, Length})
+            end,
+            AttLen = case couch_httpd:header_value(Req,"Content-Length") of
+                undefined -> undefined;
+                Len -> list_to_integer(Len)
+            end,
+            ContentEnc = string:to_lower(string:strip(
+                couch_httpd:header_value(Req,"Content-Encoding","identity")
+            )),
+            Encoding = case ContentEnc of
                 "identity" ->
                    identity;
                 "gzip" ->
@@ -992,8 +990,15 @@ db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileN
                        bad_ctype,
                        "Only gzip and identity content-encodings are supported"
                    })
-                end
-            }]
+            end,
+            [couch_att:new([
+                {name, FileName},
+                {type, MimeType},
+                {data, Data},
+                {att_len, AttLen},
+                {md5, get_md5_header(Req)},
+                {encoding, Encoding}
+            ])]
     end,
 
     Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of
@@ -1010,7 +1015,7 @@ db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileN
 
     #doc{atts=Atts} = Doc,
     DocEdited = Doc#doc{
-        atts = NewAtt ++ [A || A <- Atts, A#att.name /= FileName]
+        atts = NewAtt ++ [A || A <- Atts, couch_att:fetch(name, A) /= FileName]
     },
 
     Headers = case Method of
@@ -1216,7 +1221,8 @@ parse_copy_destination_header(Req) ->
     end.
 
 validate_attachment_names(Doc) ->
-    lists:foreach(fun(#att{name=Name}) ->
+    lists:foreach(fun(Att) ->
+        Name = couch_att:fetch(name, Att),
         validate_attachment_name(Name)
     end, Doc#doc.atts).
 


[05/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
[WIP] Add an event for each updated design doc

This sends an event for each design doc so we can include the document
id.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/439db650
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/439db650
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/439db650

Branch: refs/heads/windsor-merge
Commit: 439db650336a7b7b71ff60d150865341efdd162a
Parents: 5a2da10
Author: Paul J. Davis <pa...@gmail.com>
Authored: Thu Aug 7 13:34:49 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_db_updater.erl | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/439db650/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 504c21e..2c1e808 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -289,6 +289,13 @@ handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts,
         [catch(ClientPid ! {done, self()}) || ClientPid <- Clients],
         Db3 = case length(UpdatedDDocIds) > 0 of
             true ->
+                % Ken and ddoc_cache are the only things that
+                % use the unspecified ddoc_updated message. We
+                % should update them to use the new message per
+                % ddoc.
+                lists:foreach(fun(DDocId) ->
+                    couch_event:notify(Db2#db.name, {ddoc_updated, DDocId})
+                end, UpdatedDDocIds),
                 couch_event:notify(Db2#db.name, ddoc_updated),
                 ddoc_cache:evict(Db2#db.name, UpdatedDDocIds),
                 refresh_validate_doc_funs(Db2);


[09/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Fix multipart PUTs for document attachments

Accidentally dropped some work for multipart attachments in clusters
when we merged couch_att.erl


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/28fdeb4f
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/28fdeb4f
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/28fdeb4f

Branch: refs/heads/windsor-merge
Commit: 28fdeb4f932f9e0f0fae59dbe89ef7e236e6e143
Parents: c484520
Author: Paul J. Davis <pa...@gmail.com>
Authored: Sun Aug 10 17:32:52 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:07 2014 +0100

----------------------------------------------------------------------
 src/couch_att.erl | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/28fdeb4f/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index e011402..720d435 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -527,6 +527,24 @@ flush_data(Fd, Fun, Att) when is_function(Fun) ->
             couch_db:with_stream(Fd, Att, fun(OutputStream) ->
                 write_streamed_attachment(OutputStream, Fun, AttLen)
             end)
+    end;
+flush_data(Fd, {follows, Parser, Ref}, Att) ->
+    ParserRef = erlang:monitor(process, Parser),
+    Fun = fun() ->
+        Parser ! {get_bytes, Ref, self()},
+        receive
+            {started_open_doc_revs, NewRef} ->
+                couch_doc:restart_open_doc_revs(Parser, Ref, NewRef);
+            {bytes, Ref, Bytes} ->
+                Bytes;
+            {'DOWN', ParserRef, _, _, Reason} ->
+                throw({mp_parser_died, Reason})
+        end
+    end,
+    try
+        flush_data(Fd, Fun, store(data, Fun, Att))
+    after
+        erlang:demonitor(ParserRef, [flush])
     end.
 
 
@@ -564,7 +582,25 @@ foldl({Fd, Sp}, Att, Fun, Acc) ->
     couch_stream:foldl(Fd, Sp, Md5, Fun, Acc);
 foldl(DataFun, Att, Fun, Acc) when is_function(DataFun) ->
     Len = fetch(att_len, Att),
-    fold_streamed_data(DataFun, Len, Fun, Acc).
+    fold_streamed_data(DataFun, Len, Fun, Acc);
+foldl({follows, Parser, Ref}, Att, Fun, Acc) ->
+    ParserRef = erlang:monitor(process, Parser),
+    DataFun = fun() ->
+        Parser ! {get_bytes, Ref, self()},
+        receive
+            {started_open_doc_revs, NewRef} ->
+                couch_doc:restart_open_doc_revs(Parser, Ref, NewRef);
+            {bytes, Ref, Bytes} ->
+                Bytes;
+            {'DOWN', ParserRef, _, _, Reason} ->
+                throw({mp_parser_died, Reason})
+        end
+    end,
+    try
+        foldl(DataFun, store(data, DataFun, Att), Fun, Acc)
+    after
+        erlang:demonitor(ParserRef, [flush])
+    end.
 
 
 range_foldl(Att, From, To, Fun, Acc) ->


[06/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Add couch_db_header:latest/1

This function returns false if it's supplied an integer less than the
?LATEST_DISK_VERSION, true if supplied ?LATEST_DISK_VERSION, and
undefined on any other input.

BugzID: 15762


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/5a2da109
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/5a2da109
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/5a2da109

Branch: refs/heads/windsor-merge
Commit: 5a2da109272d26767cea9bc695b6e34161d6f783
Parents: 50b5aab
Author: Adam Kocoloski <ad...@cloudant.com>
Authored: Thu Jun 26 16:48:46 2014 -0400
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_db_header.erl | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/5a2da109/src/couch_db_header.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_header.erl b/src/couch_db_header.erl
index 03f95ea..2d48434 100644
--- a/src/couch_db_header.erl
+++ b/src/couch_db_header.erl
@@ -26,6 +26,7 @@
     update_seq/1,
     id_tree_state/1,
     seq_tree_state/1,
+    latest/1,
     local_tree_state/1,
     purge_seq/1,
     purged_docs/1,
@@ -283,6 +284,12 @@ upgrade_compacted_seq(#db_header{}=Header) ->
             Header
     end.
 
+latest(?LATEST_DISK_VERSION) ->
+    true;
+latest(N) when is_integer(N), N < ?LATEST_DISK_VERSION ->
+    false;
+latest(_Else) ->
+    undefined.
 
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").


[11/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Garbage collect OS process Pids

The amount of data flowing through these pids to the ports can cause a
significant amount of garbage to accumulate. This just reacts to a
request from the proc manager to occasionally garbage collect.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/245a683e
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/245a683e
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/245a683e

Branch: refs/heads/windsor-merge
Commit: 245a683e9c38caa1f303eac213dfc71d6adf9ac4
Parents: b72ae14
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 12:55:28 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_os_process.erl | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/245a683e/src/couch_os_process.erl
----------------------------------------------------------------------
diff --git a/src/couch_os_process.erl b/src/couch_os_process.erl
index 676d540..8aaf5db 100644
--- a/src/couch_os_process.erl
+++ b/src/couch_os_process.erl
@@ -207,6 +207,9 @@ handle_cast({send, Data}, #os_proc{writer=Writer, idle=Idle}=OsProc) ->
             ?LOG_ERROR("Failed sending data: ~p -> ~p", [Data, OsError]),
             {stop, normal, OsProc}
     end;
+handle_cast(garbage_collect, #os_proc{idle=Idle}=OsProc) ->
+    erlang:garbage_collect(),
+    {noreply, OsProc, Idle};
 handle_cast(stop, OsProc) ->
     {stop, normal, OsProc};
 handle_cast(Msg, #os_proc{idle=Idle}=OsProc) ->


[15/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Remove old upgrade code.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/9009e5fc
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/9009e5fc
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/9009e5fc

Branch: refs/heads/windsor-merge
Commit: 9009e5fc879f77474fb48aaf07a20f02b19bf665
Parents: be3b2e9
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 12:57:12 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_proc_manager.erl | 5 -----
 1 file changed, 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9009e5fc/src/couch_proc_manager.erl
----------------------------------------------------------------------
diff --git a/src/couch_proc_manager.erl b/src/couch_proc_manager.erl
index 3f34a64..541686c 100644
--- a/src/couch_proc_manager.erl
+++ b/src/couch_proc_manager.erl
@@ -425,11 +425,6 @@ export_proc(#proc_int{} = ProcInt) ->
     [_ | Data] = lists:sublist(tuple_to_list(ProcInt), record_info(size, proc)),
     list_to_tuple([proc | Data]).
 
-import_proc(#proc{} = P) ->
-    lists:foldl(fun(Idx, ProcInt) ->
-        setelement(Idx, ProcInt, element(Idx, P))
-    end, #proc_int{}, lists:seq(2, tuple_size(P))).
-
 maybe_spawn_proc(State, Client) ->
     #state{proc_counts=Counts, waiting=Waiting} = State,
     #client{lang=Lang} = Client,


[24/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Use an API for setting the mp_att_writers


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/94a2474a
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/94a2474a
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/94a2474a

Branch: refs/heads/windsor-merge
Commit: 94a2474a4ae9cf8e6cce2511ccd26ab343fdeea8
Parents: 4f9cb35
Author: Paul J. Davis <pa...@gmail.com>
Authored: Fri Aug 22 02:13:05 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_doc.erl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/94a2474a/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index 74450a4..d92412d 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -447,7 +447,7 @@ doc_from_multi_part_stream(ContentType, DataFun, Ref) ->
     {Parser, ParserRef} = spawn_monitor(fun() ->
         ParentRef = erlang:monitor(process, Parent),
         put(mp_parent_ref, ParentRef),
-        put(num_mp_writers, NumMpWriters),
+        num_mp_writers(NumMpWriters),
         {<<"--",_/binary>>, _, _} = couch_httpd:parse_multipart_request(
             ContentType, DataFun,
             fun(Next) -> mp_parse_doc(Next, []) end),
@@ -594,6 +594,10 @@ maybe_send_data({Ref, Chunks, Offset, Counters, Waiting}) ->
     end.
 
 
+num_mp_writers(N) ->
+    erlang:put(mp_att_writers, N).
+
+
 num_mp_writers() ->
     case erlang:get(mp_att_writers) of
         undefined -> 1;


[19/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Fix attachment revpos values


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/fdbf505d
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/fdbf505d
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/fdbf505d

Branch: refs/heads/windsor-merge
Commit: fdbf505da78fa425987a4492073a0133467a057b
Parents: 093454d
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 15:43:13 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_att.erl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/fdbf505d/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index e174c48..f0d2dd1 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -406,8 +406,9 @@ stub_from_json(Att, Props) ->
 follow_from_json(Att, Props) ->
     {DiskLen, EncodedLen, Encoding} = encoded_lengths_from_json(Props),
     Digest = digest_from_json(Props),
+    RevPos = couch_util:get_value(<<"revpos">>, Props, 0),
     store([
-        {md5, Digest}, {data, follows}, {disk_len, DiskLen},
+        {md5, Digest}, {revpos, RevPos}, {data, follows}, {disk_len, DiskLen},
         {att_len, EncodedLen}, {encoding, Encoding}
     ], Att).
 
@@ -416,7 +417,11 @@ inline_from_json(Att, Props) ->
     B64Data = couch_util:get_value(<<"data">>, Props),
     Data = base64:decode(B64Data),
     Length = size(Data),
-    store([{data, Data}, {disk_len, Length}, {att_len, Length}], Att).
+    RevPos = couch_util:get_value(<<"revpos">>, Props, 0),
+    store([
+        {data, Data}, {revpos, RevPos}, {disk_len, Length},
+        {att_len, Length}
+    ], Att).
 
 
 encoded_lengths_from_json(Props) ->


[17/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Fix couch_ejson_compare:less/2 for complex values

The equality comparisons in the functions used by couch_ejson_compare:less/2
were never updated to return 0 for equal values as they were always
assuming to return pure ordering information. This fixes the missing
cases for both arrays and objects.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/9573e90e
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/9573e90e
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/9573e90e

Branch: refs/heads/windsor-merge
Commit: 9573e90e09dad737e9d739780821ee01b7bbb0bd
Parents: f4c988a
Author: Paul J. Davis <pa...@gmail.com>
Authored: Sun Aug 17 12:49:09 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_ejson_compare.erl | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9573e90e/src/couch_ejson_compare.erl
----------------------------------------------------------------------
diff --git a/src/couch_ejson_compare.erl b/src/couch_ejson_compare.erl
index a1c5b21..81adbb8 100644
--- a/src/couch_ejson_compare.erl
+++ b/src/couch_ejson_compare.erl
@@ -73,6 +73,8 @@ atom_sort(null) -> 1;
 atom_sort(false) -> 2;
 atom_sort(true) -> 3.
 
+less_props([], []) ->
+    0;
 less_props([], [_|_]) ->
     -1;
 less_props(_, []) ->
@@ -90,6 +92,8 @@ less_props([{AKey, AValue}|RestA], [{BKey, BValue}|RestB]) ->
         Result
     end.
 
+less_list([], []) ->
+    0;
 less_list([], [_|_]) ->
     -1;
 less_list(_, []) ->


[14/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Silence warnings for dict type


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/14c384ff
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/14c384ff
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/14c384ff

Branch: refs/heads/windsor-merge
Commit: 14c384ff58ce3a8ade9c6a73fab2fe678173a823
Parents: 28fdeb4
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 12:29:19 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_att.erl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/14c384ff/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index 720d435..347a8fd 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -50,6 +50,7 @@
     downgrade/1
 ]).
 
+-compile(nowarn_deprecated_type).
 -export_type([att/0]).
 
 -include_lib("couch/include/couch_db.hrl").
@@ -247,7 +248,7 @@ merge_stubs(MemAtts, DiskAtts) ->
     merge_stubs(MemAtts, OnDisk, []).
 
 
--spec merge_stubs([att()], dict:dict(), [att()]) -> [att()].
+-spec merge_stubs([att()], dict(), [att()]) -> [att()].
 merge_stubs([Att | Rest], OnDisk, Merged) ->
     case fetch(data, Att) of
         stub ->


[26/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Move couch stats handler to couch_stats


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/14dc5e93
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/14dc5e93
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/14dc5e93

Branch: refs/heads/windsor-merge
Commit: 14dc5e93d79b438a794f594f5555766c44a9b231
Parents: 0f35575
Author: Robert Newson <rn...@apache.org>
Authored: Sat Aug 23 14:30:22 2014 +0100
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:13 2014 +0100

----------------------------------------------------------------------
 src/couch_httpd_stats_handlers.erl | 53 ---------------------------------
 1 file changed, 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/14dc5e93/src/couch_httpd_stats_handlers.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_stats_handlers.erl b/src/couch_httpd_stats_handlers.erl
deleted file mode 100644
index 88376ab..0000000
--- a/src/couch_httpd_stats_handlers.erl
+++ /dev/null
@@ -1,53 +0,0 @@
-% Licensed under the Apache License, Version 2.0 (the "License"); you may not
-% use this file except in compliance with the License. You may obtain a copy of
-% the License at
-%
-%   http://www.apache.org/licenses/LICENSE-2.0
-%
-% Unless required by applicable law or agreed to in writing, software
-% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-% License for the specific language governing permissions and limitations under
-% the License.
-
--module(couch_httpd_stats_handlers).
--include("couch_db.hrl").
-
--export([handle_stats_req/1]).
-
-handle_stats_req(#httpd{method='GET', path_parts=[_]}=Req) ->
-    Stats = couch_stats:fetch(),
-    Nested = nest(Stats),
-    EJSON = to_ejson(Nested),
-    couch_httpd:send_json(Req, EJSON).
-
-nest(Proplist) ->
-    nest(Proplist, []).
-
-nest([], Acc) ->
-    Acc;
-nest([{[Key|Keys], Value}|Rest], Acc) ->
-    Acc1 = case proplists:lookup(Key, Acc) of
-        {Key, Old} ->
-            [{Key, nest([{Keys, Value}], Old)}|proplists:delete(Key, Acc)];
-        none ->
-            Term = lists:foldr(fun(K, A) -> [{K, A}] end, Value, Keys),
-            [{Key, Term}|Acc]
-    end,
-    nest(Rest, Acc1).
-
-to_ejson([{_, _}|_]=Proplist) ->
-    EJSONProps = lists:map(
-       fun({Key, Value}) -> {maybe_format_key(Key), to_ejson(Value)} end,
-       Proplist
-    ),
-    {EJSONProps};
-to_ejson(NotAProplist) ->
-    NotAProplist.
-
-maybe_format_key(Key) when is_integer(Key) ->
-    maybe_format_key(integer_to_list(Key));
-maybe_format_key(Key) when is_list(Key) ->
-    list_to_binary(Key);
-maybe_format_key(Key) ->
-    Key.


[25/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Merge CouchDB's idea of data_size

This paches dbcore to include the CouchDB notion of data sizes which is
defined as the size of all live data in a database file. This number is
useful for deciding when to compact databases.

Technically speaking this measure is lacking a bit of information
because it does not currently account for headers, security objects, or
purged document information.

The new sizes are named slightly differently than either the CouchDB
version or the Cloudant version. The new names are:

     file - Total number of bytes in the file
     active - Active bytes in the current MVCC snapshot
     external - Theoretical bytes to store user data uncompressed

BugzId: 27061


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/0f35575b
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/0f35575b
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/0f35575b

Branch: refs/heads/windsor-merge
Commit: 0f35575b949d8c7f335e989f7f63f55a8dbf0af6
Parents: 94a2474
Author: Paul J. Davis <pa...@gmail.com>
Authored: Tue Jan 28 17:43:10 2014 -0600
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:13 2014 +0100

----------------------------------------------------------------------
 include/couch_db.hrl     |  10 +-
 src/couch_att.erl        |  26 ++---
 src/couch_db.erl         |  59 ++++++----
 src/couch_db_updater.erl | 249 ++++++++++++++++++++++++++++--------------
 4 files changed, 220 insertions(+), 124 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/0f35575b/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 8bb44a0..bbdfda9 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -58,12 +58,17 @@
     revs = [] % rev_info
 }).
 
+-record(size_info, {
+    active = 0,
+    external = 0
+}).
+
 -record(full_doc_info, {
     id = <<"">>,
     update_seq = 0,
     deleted = false,
     rev_tree = [],
-    leafs_size = 0
+    sizes = #size_info{}
 }).
 
 -record(httpd, {
@@ -204,6 +209,7 @@
     deleted,
     ptr,
     seq,
-    size = nil
+    sizes = #size_info{},
+    atts = []
 }).
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/0f35575b/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index 232bb60..07ee4da 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -27,7 +27,7 @@
 ]).
 
 -export([
-    disk_info/2,
+    size_info/1,
     to_disk_term/1,
     from_disk_term/2
 ]).
@@ -276,22 +276,14 @@ merge_stubs([], _, Merged) ->
     {ok, lists:reverse(Merged)}.
 
 
-disk_info(_, []) ->
-    {ok, [], []};
-disk_info(ActiveFd, Atts) ->
-    {AttFd, _} = fetch(data, hd(Atts)),
-    if
-        AttFd == ActiveFd ->
-            Tuples = [to_disk_term(Att) || Att <- Atts],
-            Info = lists:map(fun(Att) ->
-                [{_, Pos}, AttLen] = fetch([data, att_len], Att),
-                {Pos, AttLen}
-            end, Atts),
-            {ok, Tuples, Info};
-        true ->
-            ?LOG_ERROR("MISMATCH: ~p ; ~p~n", [ActiveFd, Atts]),
-            file_mismatch
-    end.
+size_info([]) ->
+    {ok, []};
+size_info(Atts) ->
+    Info = lists:map(fun(Att) ->
+        [{_, Pos}, AttLen] = fetch([data, att_len], Att),
+        {Pos, AttLen}
+    end, Atts),
+    {ok, lists:usort(Info)}.
 
 
 %% When converting an attachment to disk term format, attempt to stay with the

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/0f35575b/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index bd250ef..b3663fe 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -321,12 +321,15 @@ get_db_info(Db) ->
         name=Name,
         instance_start_time=StartTime,
         committed_update_seq=CommittedUpdateSeq,
-        id_tree = IdBtree,
-        seq_tree = SeqBtree,
-        local_tree = LocalBtree
+        id_tree = IdBtree
     } = Db,
-    {ok, Size} = couch_file:bytes(Fd),
+    {ok, FileSize} = couch_file:bytes(Fd),
     {ok, DbReduction} = couch_btree:full_reduce(IdBtree),
+    SizeInfo0 = element(3, DbReduction),
+    SizeInfo = if is_record(SizeInfo0, size_info) -> SizeInfo0; true ->
+        #size_info{active=SizeInfo0}
+    end,
+    ActiveSize = active_size(Db, SizeInfo),
     DiskVersion = couch_db_header:disk_version(Header),
     Uuid = case get_uuid(Db) of
         undefined -> null;
@@ -343,8 +346,14 @@ get_db_info(Db) ->
         {update_seq, SeqNum},
         {purge_seq, couch_db:get_purge_seq(Db)},
         {compact_running, Compactor/=nil},
-        {disk_size, Size},
-        {data_size, db_data_size(DbReduction, [SeqBtree, IdBtree, LocalBtree])},
+        {disk_size, FileSize}, % legacy
+        {other, {[{data_size, SizeInfo#size_info.external}]}}, % legacy
+        {data_size, ActiveSize}, % legacy
+        {sizes, {[
+            {file, FileSize},
+            {active, ActiveSize},
+            {external, SizeInfo#size_info.external}
+        ]}},
         {instance_start_time, StartTime},
         {disk_format_version, DiskVersion},
         {committed_update_seq, CommittedUpdateSeq},
@@ -353,23 +362,24 @@ get_db_info(Db) ->
         ],
     {ok, InfoList}.
 
-db_data_size({_Count, _DelCount}, _Trees) ->
-    % pre 1.2 format, upgraded on compaction
-    null;
-db_data_size({_Count, _DelCount, nil}, _Trees) ->
-    null;
-db_data_size({_Count, _DelCount, DocAndAttsSize}, Trees) ->
-    sum_tree_sizes(DocAndAttsSize, Trees).
-
-sum_tree_sizes(Acc, []) ->
-    Acc;
-sum_tree_sizes(Acc, [T | Rest]) ->
-    case couch_btree:size(T) of
-    nil ->
-        null;
-    Sz ->
-        sum_tree_sizes(Acc + Sz, Rest)
-    end.
+active_size(#db{}=Db, Size) when is_integer(Size) ->
+    active_size(Db, #size_info{active=Size});
+active_size(#db{}=Db, #size_info{}=SI) ->
+    Trees = [
+        Db#db.id_tree,
+        Db#db.seq_tree,
+        Db#db.local_tree
+    ],
+    lists:foldl(fun(T, Acc) ->
+        case couch_btree:size(T) of
+            _ when Acc == null ->
+                null;
+            undefined ->
+                null;
+            Size ->
+                Acc + Size
+        end
+    end, SI#size_info.active, Trees).
 
 get_design_docs(#db{name = <<"shards/", _:18/binary, DbName/binary>>}) ->
     {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName)) end),
@@ -1022,6 +1032,7 @@ prepare_doc_summaries(Db, BucketList) ->
     [lists:map(
         fun(#doc{body = Body, atts = Atts} = Doc) ->
             DiskAtts = [couch_att:to_disk_term(Att) || Att <- Atts],
+            {ok, SizeInfo} = couch_att:size_info(Atts),
             AttsFd = case Atts of
             [Att | _] ->
                 {Fd, _} = couch_att:fetch(data, Att),
@@ -1030,7 +1041,7 @@ prepare_doc_summaries(Db, BucketList) ->
                 nil
             end,
             SummaryChunk = couch_db_updater:make_doc_summary(Db, {Body, DiskAtts}),
-            Doc#doc{body = {summary, SummaryChunk, AttsFd}}
+            Doc#doc{body = {summary, SummaryChunk, SizeInfo, AttsFd}}
         end,
         Bucket) || Bucket <- BucketList].
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/0f35575b/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index cd434df..d18463b 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -373,40 +373,79 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) ->
     end.
 
 rev_tree(DiskTree) ->
-    couch_key_tree:mapfold(fun
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
-            % pre 1.2 format, will be upgraded on compaction
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, nil};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, Acc};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
-            Acc2 = sum_leaf_sizes(Acc, Size),
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc2};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc};
-        (_RevId, ?REV_MISSING, _Type, Acc) ->
-            {?REV_MISSING, Acc}
-    end, 0, DiskTree).
+    couch_key_tree:map(fun
+        (_RevId, {Del, Ptr, Seq}) ->
+            #leaf{
+                deleted = ?i2b(Del),
+                ptr = Ptr,
+                seq = Seq
+            };
+        (_RevId, {Del, Ptr, Seq, Size}) ->
+            #leaf{
+                deleted = ?i2b(Del),
+                ptr = Ptr,
+                seq = Seq,
+                sizes = upgrade_sizes(Size)
+            };
+        (_RevId, {Del, Ptr, Seq, Sizes, Atts}) ->
+            #leaf{
+                deleted = ?i2b(Del),
+                ptr = Ptr,
+                seq = Seq,
+                sizes = upgrade_sizes(Sizes),
+                atts = Atts
+            };
+        (_RevId, ?REV_MISSING) ->
+            ?REV_MISSING
+    end, DiskTree).
 
 disk_tree(RevTree) ->
     couch_key_tree:map(fun
         (_RevId, ?REV_MISSING) ->
             ?REV_MISSING;
-        (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, size=Size}) ->
-            {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
+        (_RevId, #leaf{} = Leaf) ->
+            #leaf{
+                deleted = Del,
+                ptr = Ptr,
+                seq = Seq,
+                sizes = Sizes,
+                atts = Atts
+            } = Leaf,
+            {?b2i(Del), Ptr, Seq, split_sizes(Sizes), Atts}
     end, RevTree).
 
-btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) ->
-    {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
+upgrade_sizes(#size_info{}=SI) ->
+    SI;
+upgrade_sizes({D, E}) ->
+    #size_info{active=D, external=E};
+upgrade_sizes(S) when is_integer(S) ->
+    #size_info{active=S, external=0}.
+
+split_sizes(#size_info{}=SI) ->
+    {SI#size_info.active, SI#size_info.external}.
+
+join_sizes({Active, External}) when is_integer(Active), is_integer(External) ->
+    #size_info{active=Active, external=External}.
+
+btree_by_seq_split(#full_doc_info{}=Info) ->
+    #full_doc_info{
+        id = Id,
+        update_seq = Seq,
+        deleted = Del,
+        sizes = SizeInfo,
+        rev_tree = Tree
+    } = Info,
+    {Seq, {Id, ?b2i(Del), split_sizes(SizeInfo), disk_tree(Tree)}}.
 
 btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
-    {RevTree, LeafsSize} = rev_tree(DiskTree),
+    btree_by_seq_join(Seq, {Id, Del, {0, 0}, DiskTree});
+btree_by_seq_join(Seq, {Id, Del, Sizes, DiskTree}) when is_integer(Del) ->
     #full_doc_info{
         id = Id,
         update_seq = Seq,
         deleted = ?i2b(Del),
-        rev_tree = RevTree,
-        leafs_size = LeafsSize
+        sizes = join_sizes(Sizes),
+        rev_tree = rev_tree(DiskTree)
     };
 btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
     % Older versions stored #doc_info records in the seq_tree.
@@ -420,49 +459,63 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
             [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
                 {Rev, Seq, Bp} <- DeletedRevInfos]}.
 
-btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
-        deleted=Deleted, rev_tree=Tree}) ->
-    {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
+btree_by_id_split(#full_doc_info{}=Info) ->
+    #full_doc_info{
+        id = Id,
+        update_seq = Seq,
+        deleted = Deleted,
+        sizes = SizeInfo,
+        rev_tree = Tree
+    } = Info,
+    {Id, {Seq, ?b2i(Deleted), split_sizes(SizeInfo), disk_tree(Tree)}}.
 
+% Handle old formats before data_size was added
 btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
-    {Tree, LeafsSize} = rev_tree(DiskTree),
+    btree_by_id_join(Id, {HighSeq, Deleted, #size_info{}, DiskTree});
+
+btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) ->
     #full_doc_info{
         id = Id,
         update_seq = HighSeq,
         deleted = ?i2b(Deleted),
-        rev_tree = Tree,
-        leafs_size = LeafsSize
+        sizes = upgrade_sizes(Sizes),
+        rev_tree = rev_tree(DiskTree)
     }.
 
 btree_by_id_reduce(reduce, FullDocInfos) ->
     lists:foldl(
-        fun(Info, {NotDeleted, Deleted, Size}) ->
-            Size2 = sum_leaf_sizes(Size, Info#full_doc_info.leafs_size),
+        fun(Info, {NotDeleted, Deleted, Sizes}) ->
+            Sizes2 = reduce_sizes(Sizes, Info#full_doc_info.sizes),
             case Info#full_doc_info.deleted of
             true ->
-                {NotDeleted, Deleted + 1, Size2};
+                {NotDeleted, Deleted + 1, Sizes2};
             false ->
-                {NotDeleted + 1, Deleted, Size2}
+                {NotDeleted + 1, Deleted, Sizes2}
             end
         end,
-        {0, 0, 0}, FullDocInfos);
+        {0, 0, #size_info{}}, FullDocInfos);
 btree_by_id_reduce(rereduce, Reds) ->
     lists:foldl(
-        fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSize}) ->
+        fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSizes}) ->
             % pre 1.2 format, will be upgraded on compaction
             {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil};
-        ({NotDeleted, Deleted, Size}, {AccNotDeleted, AccDeleted, AccSize}) ->
-            AccSize2 = sum_leaf_sizes(AccSize, Size),
-            {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSize2}
+        ({NotDeleted, Deleted, Sizes}, {AccNotDeleted, AccDeleted, AccSizes}) ->
+            AccSizes2 = reduce_sizes(AccSizes, Sizes),
+            {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSizes2}
         end,
-        {0, 0, 0}, Reds).
+        {0, 0, #size_info{}}, Reds).
 
-sum_leaf_sizes(nil, _) ->
+reduce_sizes(nil, _) ->
     nil;
-sum_leaf_sizes(_, nil) ->
+reduce_sizes(_, nil) ->
     nil;
-sum_leaf_sizes(Size1, Size2) ->
-    Size1 + Size2.
+reduce_sizes(S1, S2) when is_integer(S1); is_integer(S2) ->
+    reduce_sizes(upgrade_sizes(S1), upgrade_sizes(S2));
+reduce_sizes(#size_info{}=S1, #size_info{}=S2) ->
+    #size_info{
+        active = S1#size_info.active + S2#size_info.active,
+        external = S1#size_info.external + S2#size_info.external
+    }.
 
 btree_by_seq_reduce(reduce, DocInfos) ->
     % count the number of documents
@@ -574,10 +627,11 @@ flush_trees(_Db, [], AccFlushedTrees) ->
 flush_trees(#db{fd = Fd} = Db,
         [InfoUnflushed | RestUnflushed], AccFlushed) ->
     #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed,
-    {Flushed, LeafsSize} = couch_key_tree:mapfold(
-        fun(_Rev, Value, Type, Acc) ->
+    {Flushed, FinalAcc} = couch_key_tree:mapfold(
+        fun(_Rev, Value, Type, SizesAcc) ->
             case Value of
-            #doc{deleted = IsDeleted, body = {summary, Summary, AttsFd}} ->
+            #doc{deleted = IsDeleted, body = {summary, _, _, _} = DocSummary} ->
+                {summary, Summary, AttSizeInfo, AttsFd} = DocSummary,
                 % this node value is actually an unwritten document summary,
                 % write to disk.
                 % make sure the Fd in the written bins is the same Fd we are
@@ -596,31 +650,48 @@ flush_trees(#db{fd = Fd} = Db,
                             " changed. Possibly retrying.", []),
                     throw(retry)
                 end,
+                ExternalSize = ?term_size(Summary),
                 {ok, NewSummaryPointer, SummarySize} =
                     couch_file:append_raw_chunk(Fd, Summary),
-                TotalSize = lists:foldl(
-                    fun(Att, A) -> A + couch_att:fetch(att_len, Att) end,
-                    SummarySize, Value#doc.atts),
-                NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
-                                 seq=UpdateSeq, size=TotalSize},
-                case Type of
-                leaf ->
-                    {NewValue, Acc + TotalSize};
-                branch ->
-                    {NewValue, Acc}
-                end;
-             {_, _, _, LeafSize} when Type =:= leaf, LeafSize =/= nil ->
-                {Value, Acc + LeafSize};
-             _ ->
-                {Value, Acc}
+                Leaf = #leaf{
+                    deleted = IsDeleted,
+                    ptr = NewSummaryPointer,
+                    seq = UpdateSeq,
+                    sizes = #size_info{
+                        active = SummarySize,
+                        external = ExternalSize
+                    },
+                    atts = AttSizeInfo
+                },
+                {Leaf, add_sizes(Type, Leaf, SizesAcc)};
+            #leaf{} ->
+                {Value, add_sizes(Type, Value, SizesAcc)};
+            _ ->
+                {Value, SizesAcc}
             end
-        end, 0, Unflushed),
-    InfoFlushed = InfoUnflushed#full_doc_info{
+        end, {0, 0, []}, Unflushed),
+    {FinalAS, FinalES, FinalAtts} = FinalAcc,
+    TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
+    NewInfo = InfoUnflushed#full_doc_info{
         rev_tree = Flushed,
-        leafs_size = LeafsSize
+        sizes = #size_info{
+            active = FinalAS + TotalAttSize,
+            external = FinalES + TotalAttSize
+        }
     },
-    flush_trees(Db, RestUnflushed, [InfoFlushed | AccFlushed]).
-
+    flush_trees(Db, RestUnflushed, [NewInfo | AccFlushed]).
+
+add_sizes(Type, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
+    % Maybe upgrade from disk_size only
+    #size_info{
+        active = ActiveSize,
+        external = ExternalSize
+    } = upgrade_sizes(Sizes),
+    {ASAcc, ESAcc, AttsAcc} = Acc,
+    NewASAcc = ActiveSize + ASAcc,
+    NewESAcc = ESAcc + if Type == leaf -> ExternalSize; true -> 0 end,
+    NewAttsAcc = lists:umerge(AttSizes, AttsAcc),
+    {NewASAcc, NewESAcc, NewAttsAcc}.
 
 send_result(Client, Doc, NewResult) ->
     % used to send a result to the client
@@ -966,23 +1037,39 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
         A =< B
     end, merge_lookups(MixedInfos, LookupResults)),
 
-    NewInfos1 = lists:map(
-        fun(#full_doc_info{rev_tree=RevTree}=Info) ->
-            Info#full_doc_info{rev_tree=couch_key_tree:map(
-                fun(_, _, branch) ->
-                    ?REV_MISSING;
-                (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
-                    {_Body, AttsInfo} = Summary = copy_doc_attachments(
-                        Db, Sp, DestFd),
-                    SummaryChunk = make_doc_summary(NewDb, Summary),
-                    {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
-                        DestFd, SummaryChunk),
-                    TotalLeafSize = lists:foldl(
-                        fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen end,
-                        SummarySize, AttsInfo),
-                    Leaf#leaf{ptr=Pos, size=TotalLeafSize}
-                end, RevTree)}
-        end, NewInfos0),
+    NewInfos1 = lists:map(fun(Info) ->
+        {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
+            (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
+                {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
+                SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
+                ExternalSize = ?term_size(SummaryChunk),
+                {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
+                    DestFd, SummaryChunk),
+                AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
+                NewLeaf = Leaf#leaf{
+                    ptr = Pos,
+                    sizes = #size_info{
+                        active = SummarySize,
+                        external = ExternalSize
+                    },
+                    atts = AttSizes
+                },
+                {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)};
+            (_Rev, _Leaf, branch, SizesAcc) ->
+                {?REV_MISSING, SizesAcc}
+        end, {0, 0, []}, Info#full_doc_info.rev_tree),
+        {FinalAS, FinalES, FinalAtts} = FinalAcc,
+        TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
+        NewActiveSize = FinalAS + TotalAttSize,
+        NewExternalSize = FinalES + TotalAttSize,
+        Info#full_doc_info{
+            rev_tree = NewRevTree,
+            sizes = #size_info{
+                active = NewActiveSize,
+                external = NewExternalSize
+            }
+        }
+    end, NewInfos0),
 
     NewInfos = stem_full_doc_infos(Db, NewInfos1),
     RemoveSeqs =


[22/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Remove duplicate couch_replicator processes

These are started by the couch_replicator application now.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/093454d4
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/093454d4
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/093454d4

Branch: refs/heads/windsor-merge
Commit: 093454d432ec66c397366d285f8ef12603369566
Parents: 3755bb6
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 15:02:33 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_primary_sup.erl | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/093454d4/src/couch_primary_sup.erl
----------------------------------------------------------------------
diff --git a/src/couch_primary_sup.erl b/src/couch_primary_sup.erl
index cbd576d..dc2d9e5 100644
--- a/src/couch_primary_sup.erl
+++ b/src/couch_primary_sup.erl
@@ -36,19 +36,7 @@ init([]) ->
             permanent,
             brutal_kill,
             worker,
-            [couch_server]},
-        {couch_replication_event,
-            {gen_event, start_link, [{local, couch_replication}]},
-            permanent,
-            brutal_kill,
-            worker,
-            dynamic},
-        {couch_replicator_job_sup,
-            {couch_replicator_job_sup, start_link, []},
-            permanent,
-            infinity,
-            supervisor,
-            [couch_replicator_job_sup]}
+            [couch_server]}
     ],
     {ok, {{one_for_one, 10, 3600}, Children}}.
 


[07/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Amend a too-restrictive pattern match

We were asserting that the subpath contains exactly one element, but in
reality the subpath can be empty (which will result in an immediate
failure to merge on the next recursion).

BugzID: 30686


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/50b5aab1
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/50b5aab1
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/50b5aab1

Branch: refs/heads/windsor-merge
Commit: 50b5aab12bdbd4a54d2fc530330e062d08c7f1c5
Parents: e32f166
Author: Adam Kocoloski <ad...@cloudant.com>
Authored: Wed May 7 11:27:47 2014 -0400
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_key_tree.erl | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/50b5aab1/src/couch_key_tree.erl
----------------------------------------------------------------------
diff --git a/src/couch_key_tree.erl b/src/couch_key_tree.erl
index 57c08a0..8007e17 100644
--- a/src/couch_key_tree.erl
+++ b/src/couch_key_tree.erl
@@ -139,6 +139,10 @@ merge_at(Nodes, Pos, [{IK, IV, [NextINode]}]) when Pos > 0 ->
         {Merged, Result} -> {[{IK, IV, Merged}], Result};
         fail -> fail
     end;
+merge_at(_Nodes, Pos, [{_IK, _IV, []}]) when Pos > 0 ->
+    % We've run out of path on the insert side, there's no way we can
+    % merge with this branch
+    fail;
 merge_at([{K, V, SubTree} | Sibs], Pos, INodes) when Pos < 0 ->
     % When Pos is negative, Depth was less than IDepth, so we
     % need to discard from the revision tree path


[08/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Make rev tree stemming for interactives optional

Documents with many conflicts and/or tombstones cause significant
performance degradation for couch_db_updater due to the stemming of rev
trees. This commit helps to fix this problem by making rev tree stemming
on interactive updates optional. This change makes it possible for rev
trees to temporarily become longer than revs_limit. When active, the
burden of rev tree stemming will fall on replication and compaction.

BugzID: 30438


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/e32f1666
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/e32f1666
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/e32f1666

Branch: refs/heads/windsor-merge
Commit: e32f1666527589f0f1cf0113d35a9a4810f4144f
Parents: dfe0e65
Author: Benjamin Bastian <be...@gmail.com>
Authored: Tue Apr 29 13:21:34 2014 -0700
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_key_tree.erl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/e32f1666/src/couch_key_tree.erl
----------------------------------------------------------------------
diff --git a/src/couch_key_tree.erl b/src/couch_key_tree.erl
index 9ad6f7f..57c08a0 100644
--- a/src/couch_key_tree.erl
+++ b/src/couch_key_tree.erl
@@ -76,7 +76,10 @@ stem/2
                 {revtree(), new_leaf | new_branch | internal_node}.
 merge(RevTree, Tree, StemDepth) ->
     {Merged, Result} = merge(RevTree, Tree),
-    {stem(Merged, StemDepth), Result}.
+    case config:get("couchdb", "stem_interactive_updates", "true") of
+        "true" -> {stem(Merged, StemDepth), Result};
+        _ -> {Merged, Result}
+    end.
 
 
 %% @doc Merge a path into a tree.


[10/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Remove debug logging


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/f4c988a5
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/f4c988a5
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/f4c988a5

Branch: refs/heads/windsor-merge
Commit: f4c988a55edc122dc04318537620f8310b456e0f
Parents: 7da0801
Author: Paul J. Davis <pa...@gmail.com>
Authored: Sat Aug 16 21:27:54 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_compaction_daemon.erl | 2 --
 1 file changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f4c988a5/src/couch_compaction_daemon.erl
----------------------------------------------------------------------
diff --git a/src/couch_compaction_daemon.erl b/src/couch_compaction_daemon.erl
index 3251d5f..53145b7 100644
--- a/src/couch_compaction_daemon.erl
+++ b/src/couch_compaction_daemon.erl
@@ -142,8 +142,6 @@ compact_loop(Parent) ->
 
 
 maybe_compact_db(DbName, Config) ->
-    etap:diag("~n~n~n~n################~nCOMPACTING: ~p~n#############~n~n",
-        [DbName]),
     case (catch couch_db:open_int(DbName, [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}])) of
     {ok, Db} ->
         DDocNames = db_ddoc_names(Db),


[23/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Use the new couch_stats application


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/f1013bfa
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/f1013bfa
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/f1013bfa

Branch: refs/heads/windsor-merge
Commit: f1013bfa222469856ce5e877407af9eeee49051f
Parents: 9573e90
Author: Paul J. Davis <pa...@gmail.com>
Authored: Thu Aug 21 01:05:11 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 priv/stat_descriptions.cfg         | 194 ++++++++++++++++----
 src/couch.app.src                  |  24 ++-
 src/couch_auth_cache.erl           |   6 +-
 src/couch_db.erl                   |  29 ++-
 src/couch_db_updater.erl           |  11 ++
 src/couch_file.erl                 |   4 +-
 src/couch_httpd.erl                |  14 +-
 src/couch_httpd_cors.erl           |   2 +-
 src/couch_httpd_db.erl             |  14 +-
 src/couch_httpd_stats_handlers.erl |  75 ++++----
 src/couch_lru.erl                  |   1 +
 src/couch_query_servers.erl        |   1 +
 src/couch_server.erl               |   6 +-
 src/couch_stats_aggregator.erl     | 312 --------------------------------
 src/couch_stats_collector.erl      | 133 --------------
 15 files changed, 268 insertions(+), 558 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/priv/stat_descriptions.cfg
----------------------------------------------------------------------
diff --git a/priv/stat_descriptions.cfg b/priv/stat_descriptions.cfg
index b80d768..d6e1586 100644
--- a/priv/stat_descriptions.cfg
+++ b/priv/stat_descriptions.cfg
@@ -14,37 +14,163 @@
 % a trailing full-stop / period
 % Please keep this in alphabetical order
 
-{couchdb, database_writes, "number of times a database was changed"}.
-{couchdb, database_reads, "number of times a document was read from a database"}.
-{couchdb, open_databases, "number of open databases"}.
-{couchdb, open_os_files, "number of file descriptors CouchDB has open"}.
-{couchdb, request_time, "length of a request inside CouchDB without MochiWeb"}.
-{couchdb, auth_cache_hits, "number of authentication cache hits"}.
-{couchdb, auth_cache_misses, "number of authentication cache misses"}.
-
-{httpd, bulk_requests, "number of bulk requests"}.
-{httpd, requests, "number of HTTP requests"}.
-{httpd, temporary_view_reads, "number of temporary view reads"}.
-{httpd, view_reads, "number of view reads"}.
-{httpd, clients_requesting_changes, "number of clients for continuous _changes"}.
-
-{httpd_request_methods, 'COPY', "number of HTTP COPY requests"}.
-{httpd_request_methods, 'DELETE', "number of HTTP DELETE requests"}.
-{httpd_request_methods, 'GET', "number of HTTP GET requests"}.
-{httpd_request_methods, 'HEAD', "number of HTTP HEAD requests"}.
-{httpd_request_methods, 'POST', "number of HTTP POST requests"}.
-{httpd_request_methods, 'PUT', "number of HTTP PUT requests"}.
-
-{httpd_status_codes, '200', "number of HTTP 200 OK responses"}.
-{httpd_status_codes, '201', "number of HTTP 201 Created responses"}.
-{httpd_status_codes, '202', "number of HTTP 202 Accepted responses"}.
-{httpd_status_codes, '301', "number of HTTP 301 Moved Permanently responses"}.
-{httpd_status_codes, '304', "number of HTTP 304 Not Modified responses"}.
-{httpd_status_codes, '400', "number of HTTP 400 Bad Request responses"}.
-{httpd_status_codes, '401', "number of HTTP 401 Unauthorized responses"}.
-{httpd_status_codes, '403', "number of HTTP 403 Forbidden responses"}.
-{httpd_status_codes, '404', "number of HTTP 404 Not Found responses"}.
-{httpd_status_codes, '405', "number of HTTP 405 Method Not Allowed responses"}.
-{httpd_status_codes, '409', "number of HTTP 409 Conflict responses"}.
-{httpd_status_codes, '412', "number of HTTP 412 Precondition Failed responses"}.
-{httpd_status_codes, '500', "number of HTTP 500 Internal Server Error responses"}.
+{[couchdb, auth_cache_hits], [
+    {type, counter},
+    {desc, <<"number of authentication cache hits">>}
+]}.
+{[couchdb, auth_cache_misses], [
+    {type, counter},
+    {desc, <<"number of authentication cache misses">>}
+]}.
+{[couchdb, collect_results_time], [
+    {type, histogram},
+    {desc, <<"microsecond latency for calls to couch_db:collect_results/3">>}
+]}.
+{[couchdb, database_writes], [
+    {type, counter},
+    {desc, <<"number of times a database was changed">>}
+]}.
+{[couchdb, database_reads], [
+    {type, counter},
+    {desc, <<"number of times a document was read from a database">>}
+]}.
+{[couchdb, db_open_time], [
+    {type, histogram},
+    {desc, <<"milliseconds required to open a database">>}
+]}.
+{[couchdb, document_inserts], [
+    {type, counter},
+    {desc, <<"number of documents inserted">>}
+]}.
+{[couchdb, document_writes], [
+    {type, counter},
+    {desc, <<"number of document write operations">>}
+]}.
+{[couchdb, local_document_writes], [
+    {type, counter},
+    {desc, <<"number of _local document write operations">>}
+]}.
+{[couchdb, httpd, bulk_requests], [
+    {type, counter},
+    {desc, <<"number of bulk requests">>}
+]}.
+{[couchdb, httpd, requests], [
+    {type, counter},
+    {desc, <<"number of HTTP requests">>}
+]}.
+{[couchdb, httpd, temporary_view_reads], [
+    {type, counter},
+    {desc, <<"number of temporary view reads">>}
+]}.
+{[couchdb, httpd, view_reads], [
+    {type, counter},
+    {desc, <<"number of view reads">>}
+]}.
+{[couchdb, httpd, clients_requesting_changes], [
+    {type, counter},
+    {desc, <<"number of clients for continuous _changes">>}
+]}.
+{[couchdb, httpd_request_methods, 'COPY'], [
+    {type, counter},
+    {desc, <<"number of HTTP COPY requests">>}
+]}.
+{[couchdb, httpd_request_methods, 'DELETE'], [
+    {type, counter},
+    {desc, <<"number of HTTP DELETE requests">>}
+]}.
+{[couchdb, httpd_request_methods, 'GET'], [
+    {type, counter},
+    {desc, <<"number of HTTP GET requests">>}
+]}.
+{[couchdb, httpd_request_methods, 'HEAD'], [
+    {type, counter},
+    {desc, <<"number of HTTP HEAD requests">>}
+]}.
+{[couchdb, httpd_request_methods, 'POST'], [
+    {type, counter},
+    {desc, <<"number of HTTP POST requests">>}
+]}.
+{[couchdb, httpd_request_methods, 'PUT'], [
+    {type, counter},
+    {desc, <<"number of HTTP PUT requests">>}
+]}.
+{[couchdb, httpd_status_codes, '200'], [
+    {type, counter},
+    {desc, <<"number of HTTP 200 OK responses">>}
+]}.
+{[couchdb, httpd_status_codes, '201'], [
+    {type, counter},
+    {desc, <<"number of HTTP 201 Created responses">>}
+]}.
+{[couchdb, httpd_status_codes, '202'], [
+    {type, counter},
+    {desc, <<"number of HTTP 202 Accepted responses">>}
+]}.
+{[couchdb, httpd_status_codes, '301'], [
+    {type, counter},
+    {desc, <<"number of HTTP 301 Moved Permanently responses">>}
+]}.
+{[couchdb, httpd_status_codes, '302'], [
+    {type, counter},
+    {desc, <<"number of HTTP 302 Found responses">>}
+]}.
+{[couchdb, httpd_status_codes, '304'], [
+    {type, counter},
+    {desc, <<"number of HTTP 304 Not Modified responses">>}
+]}.
+{[couchdb, httpd_status_codes, '400'], [
+    {type, counter},
+    {desc, <<"number of HTTP 400 Bad Request responses">>}
+]}.
+{[couchdb, httpd_status_codes, '401'], [
+    {type, counter},
+    {desc, <<"number of HTTP 401 Unauthorized responses">>}
+]}.
+{[couchdb, httpd_status_codes, '403'], [
+    {type, counter},
+    {desc, <<"number of HTTP 403 Forbidden responses">>}
+]}.
+{[couchdb, httpd_status_codes, '404'], [
+    {type, counter},
+    {desc, <<"number of HTTP 404 Not Found responses">>}
+]}.
+{[couchdb, httpd_status_codes, '405'], [
+    {type, counter},
+    {desc, <<"number of HTTP 405 Method Not Allowed responses">>}
+]}.
+{[couchdb, httpd_status_codes, '409'], [
+    {type, counter},
+    {desc, <<"number of HTTP 409 Conflict responses">>}
+]}.
+{[couchdb, httpd_status_codes, '412'], [
+    {type, counter},
+    {desc, <<"number of HTTP 412 Precondition Failed responses">>}
+]}.
+{[couchdb, httpd_status_codes, '500'], [
+    {type, counter},
+    {desc, <<"number of HTTP 500 Internal Server Error responses">>}
+]}.
+{[couchdb, open_databases], [
+    {type, counter},
+    {desc,  <<"number of open databases">>}
+]}.
+{[couchdb, open_os_files], [
+    {type, counter},
+    {desc, <<"number of file descriptors CouchDB has open">>}
+]}.
+{[couchdb, request_time], [
+    {type, histogram},
+    {desc, <<"length of a request inside CouchDB without MochiWeb">>}
+]}.
+{[couchdb, couch_server, lru_skip], [
+    {type, counter},
+    {desc, <<"number of couch_server LRU operations skipped">>}
+]}.
+{[couchdb, couchjs, map_doc], [
+    {type, counter},
+    {desc, <<"number of documents mapped in the couchjs view server">>}
+]}.
+{[couchdb, couchjs, emits], [
+    {type, counter},
+    {desc, <<"number of invocations of `emit' in map functions in the couchjs view server">>}
+]}.

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch.app.src
----------------------------------------------------------------------
diff --git a/src/couch.app.src b/src/couch.app.src
index 30f3b2a..ba12f41 100644
--- a/src/couch.app.src
+++ b/src/couch.app.src
@@ -23,11 +23,27 @@
         couch_secondary_services,
         couch_server,
         couch_sup,
-        couch_stats_aggregator,
-        couch_stats_collector,
         couch_task_status
     ]},
     {mod, {couch_app, []}},
-    {applications, [kernel, stdlib, crypto, sasl, inets, oauth, ibrowse,
-        mochiweb, ssl, couch_log, couch_event, b64url]}
+    {applications, [
+        % stdlib
+        kernel,
+        stdlib,
+        crypto,
+        sasl,
+        inets,
+        ssl,
+
+        % Upstream deps
+        ibrowse,
+        mochiweb,
+        oauth,
+
+        % ASF deps
+        b64url,
+        couch_log,
+        couch_event,
+        couch_stats
+    ]}
 ]}.

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_auth_cache.erl
----------------------------------------------------------------------
diff --git a/src/couch_auth_cache.erl b/src/couch_auth_cache.erl
index d53cff0..8cf631b 100644
--- a/src/couch_auth_cache.erl
+++ b/src/couch_auth_cache.erl
@@ -102,7 +102,7 @@ get_from_cache(UserName) ->
             [] ->
                 gen_server:call(?MODULE, {fetch, UserName}, infinity);
             [{UserName, {Credentials, _ATime}}] ->
-                couch_stats_collector:increment({couchdb, auth_cache_hits}),
+                couch_stats:increment_counter([couchdb, auth_cache_hits]),
                 gen_server:cast(?MODULE, {cache_hit, UserName}),
                 Credentials
             end
@@ -182,11 +182,11 @@ handle_call({new_max_cache_size, NewSize}, _From, State) ->
 handle_call({fetch, UserName}, _From, State) ->
     {Credentials, NewState} = case ets:lookup(?BY_USER, UserName) of
     [{UserName, {Creds, ATime}}] ->
-        couch_stats_collector:increment({couchdb, auth_cache_hits}),
+        couch_stats:increment_counter([couchdb, auth_cache_hits]),
         cache_hit(UserName, Creds, ATime),
         {Creds, State};
     [] ->
-        couch_stats_collector:increment({couchdb, auth_cache_misses}),
+        couch_stats:increment_counter([couchdb, auth_cache_misses]),
         Creds = get_user_props_from_db(UserName),
         State1 = add_cache_entry(UserName, Creds, erlang:now(), State),
         {Creds, State1}

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 8a9f9e4..bd250ef 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -125,7 +125,8 @@ is_idle(#db{compactor_pid=nil, waiting_delayed_commit=nil} = Db) ->
     undefined ->
         true;
     {monitored_by, Pids} ->
-        (Pids -- [Db#db.main_pid, whereis(couch_stats_collector)]) =:= []
+        PidTracker = whereis(couch_stats_process_tracker),
+        (Pids -- [Db#db.main_pid, PidTracker]) =:= []
     end;
 is_idle(_Db) ->
     false.
@@ -172,7 +173,7 @@ open_doc(Db, IdOrDocInfo) ->
     open_doc(Db, IdOrDocInfo, []).
 
 open_doc(Db, Id, Options) ->
-    increment_stat(Db, {couchdb, database_reads}),
+    increment_stat(Db, [couchdb, database_reads]),
     case open_doc_int(Db, Id, Options) of
     {ok, #doc{deleted=true}=Doc} ->
         case lists:member(deleted, Options) of
@@ -221,7 +222,7 @@ find_ancestor_rev_pos({RevPos, [RevId|Rest]}, AttsSinceRevs) ->
     end.
 
 open_doc_revs(Db, Id, Revs, Options) ->
-    increment_stat(Db, {couchdb, database_reads}),
+    increment_stat(Db, [couchdb, database_reads]),
     [{ok, Results}] = open_doc_revs_int(Db, [{Id, Revs}], Options),
     {ok, [apply_open_options(Result, Options) || Result <- Results]}.
 
@@ -840,7 +841,7 @@ doc_tag(#doc{meta=Meta}) ->
     end.
 
 update_docs(Db, Docs0, Options, replicated_changes) ->
-    increment_stat(Db, {couchdb, database_writes}),
+    increment_stat(Db, [couchdb, database_writes]),
     Docs = tag_docs(Docs0),
     DocBuckets = before_docs_update(Db, group_alike_docs(Docs)),
 
@@ -867,7 +868,7 @@ update_docs(Db, Docs0, Options, replicated_changes) ->
     {ok, DocErrors};
 
 update_docs(Db, Docs0, Options, interactive_edit) ->
-    increment_stat(Db, {couchdb, database_writes}),
+    increment_stat(Db, [couchdb, database_writes]),
     AllOrNothing = lists:member(all_or_nothing, Options),
     Docs = tag_docs(Docs0),
 
@@ -960,6 +961,18 @@ set_commit_option(Options) ->
         [full_commit|Options]
     end.
 
+collect_results_with_metrics(Pid, MRef, []) ->
+    Begin = os:timestamp(),
+    try
+        collect_results(Pid, MRef, [])
+    after
+        ResultsTime = timer:now_diff(os:timestamp(), Begin) div 1000,
+        couch_stats:update_histogram(
+            [couchdb, collect_results_time],
+            ResultsTime
+        )
+    end.
+
 collect_results(Pid, MRef, ResultsAcc) ->
     receive
     {result, Pid, Result} ->
@@ -981,7 +994,7 @@ write_and_commit(#db{main_pid=Pid, user_ctx=Ctx}=Db, DocBuckets1,
     MRef = erlang:monitor(process, Pid),
     try
         Pid ! {update_docs, self(), DocBuckets, NonRepDocs, MergeConflicts, FullCommit},
-        case collect_results(Pid, MRef, []) of
+        case collect_results_with_metrics(Pid, MRef, []) of
         {ok, Results} -> {ok, Results};
         retry ->
             % This can happen if the db file we wrote to was swapped out by
@@ -995,7 +1008,7 @@ write_and_commit(#db{main_pid=Pid, user_ctx=Ctx}=Db, DocBuckets1,
             DocBuckets3 = prepare_doc_summaries(Db2, DocBuckets2),
             close(Db2),
             Pid ! {update_docs, self(), DocBuckets3, NonRepDocs, MergeConflicts, FullCommit},
-            case collect_results(Pid, MRef, []) of
+            case collect_results_with_metrics(Pid, MRef, []) of
             {ok, Results} -> {ok, Results};
             retry -> throw({update_error, compaction_retry})
             end
@@ -1338,7 +1351,7 @@ increment_stat(#db{options = Options}, Stat) ->
     true ->
         ok;
     false ->
-        couch_stats_collector:increment(Stat)
+        couch_stats:increment_counter(Stat)
     end.
 
 skip_deleted(FoldFun) ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 2c1e808..cd434df 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -60,6 +60,7 @@ init({DbName, Filepath, Fd, Options}) ->
         end
     end,
     Db = init_db(DbName, Filepath, Fd, Header, Options),
+    couch_stats_process_tracker:track([couchdb, open_databases]),
     % we don't load validation funs here because the fabric query is liable to
     % race conditions.  Instead see couch_db:validate_doc_update, which loads
     % them lazily
@@ -784,6 +785,16 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
     {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexFullDocInfos, []),
     {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, IndexFullDocInfos, RemoveSeqs),
 
+
+    WriteCount = length(IndexFullDocInfos),
+    couch_stats:increment_counter([couchdb, document_inserts],
+         WriteCount - length(RemoveSeqs)),
+    couch_stats:increment_counter([couchdb, document_writes], WriteCount),
+    couch_stats:increment_counter(
+        [couchdb, local_doc_writes],
+        length(NonRepDocs)
+    ),
+
     Db3 = Db2#db{
         id_tree = DocInfoByIdBTree2,
         seq_tree = DocInfoBySeqBTree2,

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_file.erl
----------------------------------------------------------------------
diff --git a/src/couch_file.erl b/src/couch_file.erl
index 4fda603..be6b634 100644
--- a/src/couch_file.erl
+++ b/src/couch_file.erl
@@ -347,7 +347,7 @@ file_open_options(Options) ->
 maybe_track_open_os_files(Options) ->
     case not lists:member(sys_db, Options) of
         true ->
-            couch_stats_collector:track_process_count({couchdb, open_os_files});
+            couch_stats_process_tracker:track([couchdb, open_os_files]);
         false ->
             ok
     end.
@@ -574,7 +574,7 @@ split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) ->
     split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]).
 
 
-% System dbs aren't monitored by couch_stats_collector
+% System dbs aren't monitored by couch_stats_process_tracker
 is_idle(#file{is_sys=true}) ->
     case process_info(self(), monitored_by) of
         {monitored_by, []} -> true;

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_httpd.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd.erl b/src/couch_httpd.erl
index 0c5af59..32950eb 100644
--- a/src/couch_httpd.erl
+++ b/src/couch_httpd.erl
@@ -350,8 +350,8 @@ handle_request_int(MochiReq, DefaultFun,
             send_error(HttpReq, Error)
     end,
     RequestTime = round(timer:now_diff(os:timestamp(), Begin)/1000),
-    couch_stats_collector:record({couchdb, request_time}, RequestTime),
-    couch_stats_collector:increment({httpd, requests}),
+    couch_stats:update_histogram([couchdb, request_time], RequestTime),
+    couch_stats:increment_counter([httpd, requests]),
     {ok, Resp}.
 
 check_request_uri_length(Uri) ->
@@ -390,7 +390,7 @@ authenticate_request(Response, _AuthSrcs) ->
     Response.
 
 increment_method_stats(Method) ->
-    couch_stats_collector:increment({httpd_request_methods, Method}).
+    couch_stats:increment_counter([httpd_request_methods, Method]).
 
 validate_referer(Req) ->
     Host = host_for_request(Req),
@@ -614,7 +614,7 @@ log_request(#httpd{mochi_req=MochiReq,peer=Peer}=Req, Code) ->
 
 start_response_length(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Length) ->
     log_request(Req, Code),
-    couch_stats_collector:increment({httpd_status_codes, Code}),
+    couch_stats:increment_counter([httpd_status_codes, Code]),
     Headers1 = Headers ++ server_header() ++
                couch_httpd_auth:cookie_auth_header(Req, Headers),
     Headers2 = couch_httpd_cors:cors_headers(Req, Headers1),
@@ -627,7 +627,7 @@ start_response_length(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Length) ->
 
 start_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers) ->
     log_request(Req, Code),
-    couch_stats_collector:increment({httpd_status_codes, Code}),
+    couch_stats:increment_counter([httpd_status_codes, Code]),
     CookieHeader = couch_httpd_auth:cookie_auth_header(Req, Headers),
     Headers1 = Headers ++ server_header() ++ CookieHeader,
     Headers2 = couch_httpd_cors:cors_headers(Req, Headers1),
@@ -661,7 +661,7 @@ http_1_0_keep_alive(Req, Headers) ->
 
 start_chunked_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers) ->
     log_request(Req, Code),
-    couch_stats_collector:increment({httpd_status_codes, Code}),
+    couch_stats:increment_counter([httpd_status_codes, Code]),
     Headers1 = http_1_0_keep_alive(MochiReq, Headers),
     Headers2 = Headers1 ++ server_header() ++
                couch_httpd_auth:cookie_auth_header(Req, Headers1),
@@ -686,7 +686,7 @@ last_chunk(Resp) ->
 
 send_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Body) ->
     log_request(Req, Code),
-    couch_stats_collector:increment({httpd_status_codes, Code}),
+    couch_stats:increment_counter([httpd_status_codes, Code]),
     Headers1 = http_1_0_keep_alive(MochiReq, Headers),
     if Code >= 500 ->
         ?LOG_ERROR("httpd ~p error response:~n ~s", [Code, Body]);

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_httpd_cors.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_cors.erl b/src/couch_httpd_cors.erl
index 15b838e..66878d0 100644
--- a/src/couch_httpd_cors.erl
+++ b/src/couch_httpd_cors.erl
@@ -174,7 +174,7 @@ handle_preflight_request(Origin, Host, MochiReq) ->
 
 send_preflight_response(#httpd{mochi_req=MochiReq}=Req, Headers) ->
     couch_httpd:log_request(Req, 204),
-    couch_stats_collector:increment({httpd_status_codes, 204}),
+    couch_stats:increment_counter([httpd_status_codes, 204]),
     Headers1 = couch_httpd:http_1_0_keep_alive(MochiReq, Headers),
     Headers2 = Headers1 ++ couch_httpd:server_header() ++
                couch_httpd_auth:cookie_auth_header(Req, Headers1),

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_httpd_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_db.erl b/src/couch_httpd_db.erl
index 77d8788..4fdc9aa 100644
--- a/src/couch_httpd_db.erl
+++ b/src/couch_httpd_db.erl
@@ -145,16 +145,8 @@ handle_changes_req2(Req, Db) ->
             FeedChangesFun(MakeCallback(Resp))
         end
     end,
-    couch_stats_collector:increment(
-        {httpd, clients_requesting_changes}
-    ),
-    try
-        WrapperFun(ChangesFun)
-    after
-    couch_stats_collector:decrement(
-        {httpd, clients_requesting_changes}
-    )
-    end.
+    couch_stats_process_tracker:track([httpd, clients_requesting_changes]),
+    WrapperFun(ChangesFun).
 
 handle_compact_req(#httpd{method='POST'}=Req, Db) ->
     case Req#httpd.path_parts of
@@ -293,7 +285,7 @@ db_req(#httpd{path_parts=[_,<<"_ensure_full_commit">>]}=Req, _Db) ->
     send_method_not_allowed(Req, "POST");
 
 db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) ->
-    couch_stats_collector:increment({httpd, bulk_requests}),
+    couch_stats:increment_counter([httpd, bulk_requests]),
     couch_httpd:validate_ctype(Req, "application/json"),
     {JsonProps} = couch_httpd:json_body_obj(Req),
     case couch_util:get_value(<<"docs">>, JsonProps) of

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_httpd_stats_handlers.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_stats_handlers.erl b/src/couch_httpd_stats_handlers.erl
index cd357ea..88376ab 100644
--- a/src/couch_httpd_stats_handlers.erl
+++ b/src/couch_httpd_stats_handlers.erl
@@ -11,46 +11,43 @@
 % the License.
 
 -module(couch_httpd_stats_handlers).
--include_lib("couch/include/couch_db.hrl").
+-include("couch_db.hrl").
 
 -export([handle_stats_req/1]).
--import(couch_httpd, [
-    send_json/2, send_json/3, send_json/4, send_method_not_allowed/2,
-    start_json_response/2, send_chunk/2, end_json_response/1,
-    start_chunked_response/3, send_error/4
-]).
 
 handle_stats_req(#httpd{method='GET', path_parts=[_]}=Req) ->
-    flush(Req),
-    send_json(Req, couch_stats_aggregator:all(range(Req)));
-
-handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod]}) ->
-    throw({bad_request, <<"Stat names must have exactly two parts.">>});
-
-handle_stats_req(#httpd{method='GET', path_parts=[_, Mod, Key]}=Req) ->
-    flush(Req),
-    Stats = couch_stats_aggregator:get_json({list_to_atom(binary_to_list(Mod)),
-        list_to_atom(binary_to_list(Key))}, range(Req)),
-    send_json(Req, {[{Mod, {[{Key, Stats}]}}]});
-
-handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod, _Key | _Extra]}) ->
-    throw({bad_request, <<"Stat names must have exactly two parts.">>});
-
-handle_stats_req(Req) ->
-    send_method_not_allowed(Req, "GET").
-
-range(Req) ->
-    case couch_util:get_value("range", couch_httpd:qs(Req)) of
-        undefined ->
-            0;
-        Value ->
-            list_to_integer(Value)
-    end.
-
-flush(Req) ->
-    case couch_util:get_value("flush", couch_httpd:qs(Req)) of
-        "true" ->
-            couch_stats_aggregator:collect_sample();
-        _Else ->
-            ok
-    end.
+    Stats = couch_stats:fetch(),
+    Nested = nest(Stats),
+    EJSON = to_ejson(Nested),
+    couch_httpd:send_json(Req, EJSON).
+
+nest(Proplist) ->
+    nest(Proplist, []).
+
+nest([], Acc) ->
+    Acc;
+nest([{[Key|Keys], Value}|Rest], Acc) ->
+    Acc1 = case proplists:lookup(Key, Acc) of
+        {Key, Old} ->
+            [{Key, nest([{Keys, Value}], Old)}|proplists:delete(Key, Acc)];
+        none ->
+            Term = lists:foldr(fun(K, A) -> [{K, A}] end, Value, Keys),
+            [{Key, Term}|Acc]
+    end,
+    nest(Rest, Acc1).
+
+to_ejson([{_, _}|_]=Proplist) ->
+    EJSONProps = lists:map(
+       fun({Key, Value}) -> {maybe_format_key(Key), to_ejson(Value)} end,
+       Proplist
+    ),
+    {EJSONProps};
+to_ejson(NotAProplist) ->
+    NotAProplist.
+
+maybe_format_key(Key) when is_integer(Key) ->
+    maybe_format_key(integer_to_list(Key));
+maybe_format_key(Key) when is_list(Key) ->
+    list_to_binary(Key);
+maybe_format_key(Key) ->
+    Key.

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_lru.erl
----------------------------------------------------------------------
diff --git a/src/couch_lru.erl b/src/couch_lru.erl
index ad432ec..d58eb69 100644
--- a/src/couch_lru.erl
+++ b/src/couch_lru.erl
@@ -52,6 +52,7 @@ close_int({Lru, DbName, Iter}, {Tree, Dict} = Cache) ->
             {gb_trees:delete(Lru, Tree), dict:erase(DbName, Dict)};
         false ->
             true = ets:update_element(couch_dbs, DbName, {#db.fd_monitor, nil}),
+            couch_stats:increment_counter([couchdb, couch_server, lru_skip]),
             close_int(gb_trees:next(Iter), update(DbName, Cache))
         end;
     false ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_query_servers.erl
----------------------------------------------------------------------
diff --git a/src/couch_query_servers.erl b/src/couch_query_servers.erl
index c84ff7e..13b0b91 100644
--- a/src/couch_query_servers.erl
+++ b/src/couch_query_servers.erl
@@ -76,6 +76,7 @@ map_docs(Proc, Docs) ->
             FunsResults)
         end,
         Docs),
+    couch_stats:increment_counter([couchdb, couchjs, map_doc], length(Docs)),
     {ok, Results}.
 
 map_doc_raw(Proc, Doc) ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_server.erl
----------------------------------------------------------------------
diff --git a/src/couch_server.erl b/src/couch_server.erl
index 964ecad..66a3c74 100644
--- a/src/couch_server.erl
+++ b/src/couch_server.erl
@@ -345,8 +345,8 @@ handle_call({open_result, DbName, {ok, Db}}, {FromPid, _Tag}, Server) ->
     link(Db#db.main_pid),
     true = ets:delete(couch_dbs_pid_to_name, FromPid),
     case erase({async_open, DbName}) of undefined -> ok; T0 ->
-        ?LOG_INFO("needed ~p ms to open new ~s", [timer:now_diff(os:timestamp(),T0)/1000,
-            DbName])
+        OpenTime = timer:now_diff(os:timestamp(), T0) / 1000,
+        couch_stats:update_histogram([couchdb, db_open_time], OpenTime)
     end,
     % icky hack of field values - compactor_pid used to store clients
     % and fd used to possibly store a creation request
@@ -363,8 +363,6 @@ handle_call({open_result, DbName, {ok, Db}}, {FromPid, _Tag}, Server) ->
     true = ets:insert(couch_dbs_pid_to_name, {Db#db.main_pid, DbName}),
     Lru = case couch_db:is_system_db(Db) of
         false ->
-            Stat = {couchdb, open_databases},
-            couch_stats_collector:track_process_count(Db#db.main_pid, Stat),
             couch_lru:insert(DbName, Server#server.lru);
         true ->
             Server#server.lru

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_stats_aggregator.erl
----------------------------------------------------------------------
diff --git a/src/couch_stats_aggregator.erl b/src/couch_stats_aggregator.erl
deleted file mode 100644
index 45987d6..0000000
--- a/src/couch_stats_aggregator.erl
+++ /dev/null
@@ -1,312 +0,0 @@
-% Licensed under the Apache License, Version 2.0 (the "License"); you may not
-% use this file except in compliance with the License. You may obtain a copy of
-% the License at
-%
-%   http://www.apache.org/licenses/LICENSE-2.0
-%
-% Unless required by applicable law or agreed to in writing, software
-% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-% License for the specific language governing permissions and limitations under
-% the License.
-
--module(couch_stats_aggregator).
--behaviour(gen_server).
--behaviour(config_listener).
-
--export([start/0, start/1, stop/0]).
--export([all/0, all/1, get/1, get/2, get_json/1, get_json/2, collect_sample/0]).
-
--export([init/1, terminate/2, code_change/3]).
--export([handle_call/3, handle_cast/2, handle_info/2]).
-
-% config_listener api
--export([handle_config_change/5]).
-
-
--record(aggregate, {
-    description = <<"">>,
-    seconds = 0,
-    count = 0,
-    current = null,
-    sum = null,
-    mean = null,
-    variance = null,
-    stddev = null,
-    min = null,
-    max = null,
-    samples = []
-}).
-
-
-start() ->
-    PrivDir = couch_util:priv_dir(),
-    start(filename:join(PrivDir, "stat_descriptions.cfg")).
-    
-start(FileName) ->
-    gen_server:start_link({local, ?MODULE}, ?MODULE, [FileName], []).
-
-stop() ->
-    gen_server:cast(?MODULE, stop).
-
-all() ->
-    ?MODULE:all(0).
-all(Time) when is_binary(Time) ->
-    ?MODULE:all(list_to_integer(binary_to_list(Time)));
-all(Time) when is_atom(Time) ->
-    ?MODULE:all(list_to_integer(atom_to_list(Time)));
-all(Time) when is_integer(Time) ->
-    Aggs = ets:match(?MODULE, {{'$1', Time}, '$2'}),
-    Stats = lists:map(fun([Key, Agg]) -> {Key, Agg} end, Aggs),
-    case Stats of
-        [] ->
-            {[]};
-        _ ->
-            Ret = lists:foldl(fun({{Mod, Key}, Agg}, Acc) ->
-                CurrKeys = case proplists:lookup(Mod, Acc) of
-                    none -> [];
-                    {Mod, {Keys}} -> Keys
-                end,
-                NewMod = {[{Key, to_json_term(Agg)} | CurrKeys]},
-                [{Mod, NewMod} | proplists:delete(Mod, Acc)]
-            end, [], Stats),
-            {Ret}
-    end.
-
-get(Key) ->
-    ?MODULE:get(Key, 0).
-get(Key, Time) when is_binary(Time) ->
-    ?MODULE:get(Key, list_to_integer(binary_to_list(Time)));
-get(Key, Time) when is_atom(Time) ->
-    ?MODULE:get(Key, list_to_integer(atom_to_list(Time)));
-get(Key, Time) when is_integer(Time) ->
-    case ets:lookup(?MODULE, {make_key(Key), Time}) of
-        [] -> #aggregate{seconds=Time};
-        [{_, Agg}] -> Agg
-    end.
-
-get_json(Key) ->
-    get_json(Key, 0).
-get_json(Key, Time) ->
-    to_json_term(?MODULE:get(Key, Time)).
-
-collect_sample() ->
-    gen_server:call(?MODULE, collect_sample, infinity).
-
-
-init(StatDescsFileName) ->
-    % Create an aggregate entry for each {description, rate} pair.
-    ets:new(?MODULE, [named_table, set, protected]),
-    SampleStr = config:get("stats", "samples", "[0]"),
-    {ok, Samples} = couch_util:parse_term(SampleStr),
-    {ok, Descs} = file:consult(StatDescsFileName),
-    lists:foreach(fun({Sect, Key, Value}) ->
-        lists:foreach(fun(Secs) ->
-            Agg = #aggregate{
-                description=list_to_binary(Value),
-                seconds=Secs
-            },
-            ets:insert(?MODULE, {{{Sect, Key}, Secs}, Agg})
-        end, Samples)
-    end, Descs),
-    
-    ok = config:listen_for_changes(?MODULE, nil),
-    
-    Rate = list_to_integer(config:get("stats", "rate", "1000")),
-    % TODO: Add timer_start to kernel start options.
-    {ok, TRef} = timer:apply_after(Rate, ?MODULE, collect_sample, []),
-    {ok, {TRef, Rate}}.
-    
-terminate(_Reason, {TRef, _Rate}) ->
-    timer:cancel(TRef),
-    ok.
-
-handle_call(collect_sample, _, {OldTRef, SampleInterval}) ->
-    timer:cancel(OldTRef),
-    {ok, TRef} = timer:apply_after(SampleInterval, ?MODULE, collect_sample, []),
-    % Gather new stats values to add.
-    Incs = lists:map(fun({Key, Value}) ->
-        {Key, {incremental, Value}}
-    end, couch_stats_collector:all(incremental)),
-    Abs = lists:map(fun({Key, Values}) ->
-        couch_stats_collector:clear(Key),
-        Values2 = case Values of
-            X when is_list(X) -> X;
-            Else -> [Else]
-        end,
-        {_, Mean} = lists:foldl(fun(Val, {Count, Curr}) ->
-            {Count+1, Curr + (Val - Curr) / (Count+1)}
-        end, {0, 0}, Values2),
-        {Key, {absolute, Mean}}
-    end, couch_stats_collector:all(absolute)),
-    
-    Values = Incs ++ Abs,
-    Now = os:timestamp(),
-    lists:foreach(fun({{Key, Rate}, Agg}) ->
-        NewAgg = case proplists:lookup(Key, Values) of
-            none ->
-                rem_values(Now, Agg);
-            {Key, {Type, Value}} ->
-                NewValue = new_value(Type, Value, Agg#aggregate.current),
-                Agg2 = add_value(Now, NewValue, Agg),
-                rem_values(Now, Agg2)
-        end,
-        ets:insert(?MODULE, {{Key, Rate}, NewAgg})
-    end, ets:tab2list(?MODULE)),
-    {reply, ok, {TRef, SampleInterval}}.
-
-handle_cast(stop, State) ->
-    {stop, normal, State}.
-
-handle_info({gen_event_EXIT, {config_listener, ?MODULE}, _Reason}, State) ->
-    erlang:send_after(5000, self(), restart_config_listener),
-    {noreply, State};
-handle_info(restart_config_listener, State) ->
-    ok = config:listen_for_changes(?MODULE, nil),
-    {noreply, State};
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-code_change(_OldVersion, State, _Extra) ->
-    {ok, State}.
-
-
-handle_config_change("stats", _, _, _, _) ->
-    exit(whereis(?MODULE), config_change),
-    remove_handler;
-handle_config_change(_, _, _, _, _) ->
-    {ok, nil}.
-
-
-new_value(incremental, Value, null) ->
-    Value;
-new_value(incremental, Value, Current) ->
-    Value - Current;
-new_value(absolute, Value, _Current) ->
-    Value.
-
-add_value(Time, Value, #aggregate{count=Count, seconds=Secs}=Agg) when Count < 1 ->
-    Samples = case Secs of
-        0 -> [];
-        _ -> [{Time, Value}]
-    end,
-    Agg#aggregate{
-        count=1,
-        current=Value,
-        sum=Value,
-        mean=Value,
-        variance=0.0,
-        stddev=null,
-        min=Value,
-        max=Value,
-        samples=Samples
-    };
-add_value(Time, Value, Agg) ->
-    #aggregate{
-        count=Count,
-        current=Current,
-        sum=Sum,
-        mean=Mean,
-        variance=Variance,
-        samples=Samples
-    } = Agg,
-    
-    NewCount = Count + 1,
-    NewMean = Mean + (Value - Mean) / NewCount,
-    NewVariance = Variance + (Value - Mean) * (Value - NewMean),
-    StdDev = case NewCount > 1 of
-        false -> null;
-        _ -> math:sqrt(NewVariance / (NewCount - 1))
-    end,
-    Agg2 = Agg#aggregate{
-        count=NewCount,
-        current=Current + Value,
-        sum=Sum + Value,
-        mean=NewMean,
-        variance=NewVariance,
-        stddev=StdDev,
-        min=lists:min([Agg#aggregate.min, Value]),
-        max=lists:max([Agg#aggregate.max, Value])
-    },
-    case Agg2#aggregate.seconds of
-        0 -> Agg2;
-        _ -> Agg2#aggregate{samples=[{Time, Value} | Samples]}
-    end.
-
-rem_values(Time, Agg) ->
-    Seconds = Agg#aggregate.seconds,
-    Samples = Agg#aggregate.samples,
-    Pred = fun({When, _Value}) ->
-        timer:now_diff(Time, When) =< (Seconds * 1000000)
-    end,
-    {Keep, Remove} = lists:splitwith(Pred, Samples),
-    Agg2 = lists:foldl(fun({_, Value}, Acc) ->
-        rem_value(Value, Acc)
-    end, Agg, Remove),
-    Agg2#aggregate{samples=Keep}.
-
-rem_value(_Value, #aggregate{count=Count, seconds=Secs}) when Count =< 1 ->
-    #aggregate{seconds=Secs};
-rem_value(Value, Agg) ->
-    #aggregate{
-        count=Count,
-        sum=Sum,
-        mean=Mean,
-        variance=Variance
-    } = Agg,
-
-    OldMean = (Mean * Count - Value) / (Count - 1),
-    OldVariance = Variance - (Value - OldMean) * (Value - Mean),
-    OldCount = Count - 1,
-    StdDev = case OldCount > 1 of
-        false -> null;
-        _ -> math:sqrt(clamp_value(OldVariance / (OldCount - 1)))
-    end,
-    Agg#aggregate{
-        count=OldCount,
-        sum=Sum-Value,
-        mean=clamp_value(OldMean),
-        variance=clamp_value(OldVariance),
-        stddev=StdDev
-    }.
-
-to_json_term(Agg) ->
-    {Min, Max} = case Agg#aggregate.seconds > 0 of
-        false ->
-            {Agg#aggregate.min, Agg#aggregate.max};
-        _ ->
-            case length(Agg#aggregate.samples) > 0 of
-                true ->
-                    Extract = fun({_Time, Value}) -> Value end,
-                    Samples = lists:map(Extract, Agg#aggregate.samples),
-                    {lists:min(Samples), lists:max(Samples)};
-                _ ->
-                    {null, null}
-            end
-    end,
-    {[
-        {description, Agg#aggregate.description},
-        {current, round_value(Agg#aggregate.sum)},
-        {sum, round_value(Agg#aggregate.sum)},
-        {mean, round_value(Agg#aggregate.mean)},
-        {stddev, round_value(Agg#aggregate.stddev)},
-        {min, Min},
-        {max, Max}
-    ]}.
-
-make_key({Mod, Val}) when is_integer(Val) ->
-    {Mod, list_to_atom(integer_to_list(Val))};
-make_key(Key) ->
-    Key.
-
-round_value(Val) when not is_number(Val) ->
-    Val;
-round_value(Val) when Val == 0 ->
-    Val;
-round_value(Val) ->
-    erlang:round(Val * 1000.0) / 1000.0.
-
-clamp_value(Val) when Val > 0.00000000000001 ->
-    Val;
-clamp_value(_) ->
-    0.0.

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/f1013bfa/src/couch_stats_collector.erl
----------------------------------------------------------------------
diff --git a/src/couch_stats_collector.erl b/src/couch_stats_collector.erl
deleted file mode 100644
index 5bf4864..0000000
--- a/src/couch_stats_collector.erl
+++ /dev/null
@@ -1,133 +0,0 @@
-% Licensed under the Apache License, Version 2.0 (the "License"); you may not
-% use this file except in compliance with the License. You may obtain a copy of
-% the License at
-%
-%   http://www.apache.org/licenses/LICENSE-2.0
-%
-% Unless required by applicable law or agreed to in writing, software
-% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-% License for the specific language governing permissions and limitations under
-% the License.
-
-% todo
-% - remove existance check on increment(), decrement() and record(). have
-%   modules initialize counters on startup.
-
--module(couch_stats_collector).
-
--behaviour(gen_server).
--vsn(1).
-
--export([start/0, stop/0]).
--export([all/0, all/1, get/1, increment/1, decrement/1, record/2, clear/1]).
--export([track_process_count/1, track_process_count/2]).
-
--export([init/1, terminate/2, code_change/3]).
--export([handle_call/3, handle_cast/2, handle_info/2]).
-
--define(HIT_TABLE, stats_hit_table).
--define(ABS_TABLE, stats_abs_table).
-
-start() ->
-    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-
-stop() ->
-    gen_server:call(?MODULE, stop).
-
-all() ->
-    ets:tab2list(?HIT_TABLE) ++ abs_to_list().
-
-all(Type) ->
-    case Type of
-        incremental -> ets:tab2list(?HIT_TABLE);
-        absolute -> abs_to_list()
-    end.
-
-get(Key) ->
-    case ets:lookup(?HIT_TABLE, Key) of
-        [] ->
-            case ets:lookup(?ABS_TABLE, Key) of
-                [] ->
-                    nil;
-                AbsVals ->
-                    lists:map(fun({_, Value}) -> Value end, AbsVals)
-            end;
-        [{_, Counter}] ->
-            Counter
-    end.
-
-increment(Key) ->
-    Key2 = make_key(Key),
-    case catch ets:update_counter(?HIT_TABLE, Key2, 1) of
-        {'EXIT', {badarg, _}} ->
-            catch ets:insert(?HIT_TABLE, {Key2, 1}),
-            ok;
-        _ ->
-            ok
-    end.
-
-decrement(Key) ->
-    Key2 = make_key(Key),
-    case catch ets:update_counter(?HIT_TABLE, Key2, -1) of
-        {'EXIT', {badarg, _}} ->
-            catch ets:insert(?HIT_TABLE, {Key2, -1}),
-            ok;
-        _ -> ok
-    end.
-
-record(Key, Value) ->
-    catch ets:insert(?ABS_TABLE, {make_key(Key), Value}).
-
-clear(Key) ->
-    catch ets:delete(?ABS_TABLE, make_key(Key)).
-
-track_process_count(Stat) ->
-    track_process_count(self(), Stat).
-
-track_process_count(Pid, Stat) ->
-    ok = couch_stats_collector:increment(Stat),
-    gen_server:cast(?MODULE, {track_process_count, Pid, Stat}).
-
-
-init(_) ->
-    ets:new(?HIT_TABLE, [named_table, set, public]),
-    ets:new(?ABS_TABLE, [named_table, duplicate_bag, public]),
-    {ok, dict:new()}.
-
-terminate(_Reason, _State) ->
-    ok.
-
-handle_call(stop, _, State) ->
-    {stop, normal, stopped, State}.
-
-handle_cast({track_process_count, Pid, Stat}, State) ->
-    Ref = erlang:monitor(process, Pid),
-    {noreply, dict:store(Ref, Stat, State)}.
-
-handle_info({'DOWN', Ref, _, _, _}, State) ->
-    Stat = dict:fetch(Ref, State),
-    couch_stats_collector:decrement(Stat),
-    {noreply, dict:erase(Ref, State)}.
-
-code_change(_, State, _Extra) ->
-    {ok, State}.
-
-
-make_key({Module, Key}) when is_integer(Key) ->
-    {Module, list_to_atom(integer_to_list(Key))};
-make_key(Key) ->
-    Key.
-
-abs_to_list() ->
-    SortedKVs = lists:sort(ets:tab2list(?ABS_TABLE)),
-    lists:foldl(fun({Key, Val}, Acc) ->
-        case Acc of
-            [] ->
-                [{Key, [Val]}];
-            [{Key, Prev} | Rest] ->
-                [{Key, [Val | Prev]} | Rest];
-            Others ->
-                [{Key, [Val]} | Others]
-        end
-    end, [], SortedKVs).


[20/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Detect clustered dbs based on a missing id_btree

This function is called up in the HTTP coordinator so the usual trick of
detecting shard files based on the `shards/` prefix doesn't work.
Instead we can rely on the fact that the id_btree field is undefined.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/4f9cb355
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/4f9cb355
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/4f9cb355

Branch: refs/heads/windsor-merge
Commit: 4f9cb3551c84202bbd320588d47d3152dd9d6067
Parents: f1013bf
Author: Paul J. Davis <pa...@gmail.com>
Authored: Thu Aug 21 23:30:53 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_changes.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/4f9cb355/src/couch_changes.erl
----------------------------------------------------------------------
diff --git a/src/couch_changes.erl b/src/couch_changes.erl
index 637a6ba..b5e8f89 100644
--- a/src/couch_changes.erl
+++ b/src/couch_changes.erl
@@ -225,9 +225,9 @@ check_docids(_) ->
     throw({bad_request, Msg}).
 
 
-open_ddoc(#db{name= <<"shards/", _/binary>> =ShardName}, DDocId) ->
+open_ddoc(#db{name=DbName, id_tree=undefined}, DDocId) ->
     {_, Ref} = spawn_monitor(fun() ->
-        exit(fabric:open_doc(mem3:dbname(ShardName), DDocId, []))
+        exit(fabric:open_doc(mem3:dbname(DbName), DDocId, [ejson_body]))
     end),
     receive
         {'DOWN', Ref, _, _, {ok, _}=Response} ->


[03/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Add doc ID for last merged key tree to proc dict

This is for debugging purposes, ie. for operators to get the ID of a
heavily conflicted document which is taking a lot of time to merge.

BugzID: 28940


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/dfe0e658
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/dfe0e658
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/dfe0e658

Branch: refs/heads/windsor-merge
Commit: dfe0e658d8c50831033b20825343773c7af543b9
Parents: 6083a37
Author: Benjamin Bastian <be...@gmail.com>
Authored: Mon Mar 10 10:29:36 2014 -0400
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_db_updater.erl | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/dfe0e658/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 88fa3b3..504c21e 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -629,6 +629,7 @@ merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) ->
     {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq};
 merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
         [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
+    erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging
     NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
         merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts)
     end, OldDocInfo, NewDocs),


[12/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Use the correct record when updating ddoc_keys.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/be3b2e9a
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/be3b2e9a
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/be3b2e9a

Branch: refs/heads/windsor-merge
Commit: be3b2e9ae5e7443805d9d682eb8814a9d8166eaa
Parents: 245a683
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 12:56:59 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_proc_manager.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/be3b2e9a/src/couch_proc_manager.erl
----------------------------------------------------------------------
diff --git a/src/couch_proc_manager.erl b/src/couch_proc_manager.erl
index 18c82e6..3f34a64 100644
--- a/src/couch_proc_manager.erl
+++ b/src/couch_proc_manager.erl
@@ -351,7 +351,7 @@ teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc_int{ddoc_keys=Keys}=Proc) ->
     % because the query server overwrites without the rev
     Keys2 = [{D,R} || {D,R} <- Keys, D /= DDocId],
     % add ddoc to the proc
-    {ok, Proc#proc{ddoc_keys=[DDocKey|Keys2]}}.
+    {ok, Proc#proc_int{ddoc_keys=[DDocKey|Keys2]}}.
 
 make_proc(Pid, Lang, Mod) ->
     Proc = #proc_int{


[13/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Update the task if any props change

This ignores the frequency setting and just updates forcefully on any
change to the task properties.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/296b98a4
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/296b98a4
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/296b98a4

Branch: refs/heads/windsor-merge
Commit: 296b98a4dc09679375424bbd5ac53224808fcbcd
Parents: fdbf505
Author: Paul J. Davis <pa...@gmail.com>
Authored: Tue Aug 12 15:42:35 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_task_status.erl | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/296b98a4/src/couch_task_status.erl
----------------------------------------------------------------------
diff --git a/src/couch_task_status.erl b/src/couch_task_status.erl
index 8a8dbf2..8adba6b 100644
--- a/src/couch_task_status.erl
+++ b/src/couch_task_status.erl
@@ -68,9 +68,14 @@ set_update_frequency(Msecs) ->
 
 update(Props) ->
     MergeProps = lists:ukeysort(1, Props),
-    TaskProps = lists:ukeymerge(1, MergeProps, erlang:get(task_status_props)),
-    put(task_status_props, TaskProps),
-    maybe_persist(TaskProps).
+    CurrProps = erlang:get(task_status_props),
+    TaskProps = lists:ukeymerge(1, MergeProps, CurrProps),
+    case TaskProps == CurrProps of
+        true ->
+            maybe_persist(TaskProps);
+        false ->
+            persist(TaskProps)
+    end.
 
 
 get(Props) when is_list(Props) ->
@@ -81,18 +86,22 @@ get(Prop) ->
     couch_util:get_value(Prop, TaskProps).
 
 
-maybe_persist(TaskProps0) ->
+maybe_persist(TaskProps) ->
     {LastUpdateTime, Frequency} = erlang:get(task_status_update),
     case timer:now_diff(Now = os:timestamp(), LastUpdateTime) >= Frequency of
     true ->
         put(task_status_update, {Now, Frequency}),
-        TaskProps = ?set(TaskProps0, updated_on, timestamp(Now)),
-        gen_server:cast(?MODULE, {update_status, self(), TaskProps});
+        persist(TaskProps);
     false ->
         ok
     end.
 
 
+persist(TaskProps0) ->
+    TaskProps = ?set(TaskProps0, updated_on, timestamp(os:timestamp())),
+    gen_server:cast(?MODULE, {update_status, self(), TaskProps}).
+
+
 init([]) ->
     % read configuration settings and register for configuration changes
     ets:new(?MODULE, [ordered_set, protected, named_table]),


[16/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Store the language as a binary in ets

This was breaking pattern matching and never reusing OS processes. The
new hard limit in CouchDB caused the bug to manifest where we'd not seen
it previously.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/3755bb66
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/3755bb66
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/3755bb66

Branch: refs/heads/windsor-merge
Commit: 3755bb663ea09d4328f35f57062479776ae78162
Parents: 9009e5f
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 14:03:41 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_proc_manager.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/3755bb66/src/couch_proc_manager.erl
----------------------------------------------------------------------
diff --git a/src/couch_proc_manager.erl b/src/couch_proc_manager.erl
index 541686c..33ef06c 100644
--- a/src/couch_proc_manager.erl
+++ b/src/couch_proc_manager.erl
@@ -355,7 +355,7 @@ teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc_int{ddoc_keys=Keys}=Proc) ->
 
 make_proc(Pid, Lang, Mod) ->
     Proc = #proc_int{
-        lang = Lang,
+        lang = couch_util:to_binary(Lang),
         pid = Pid,
         prompt_fun = {Mod, prompt},
         set_timeout_fun = {Mod, set_timeout},


[02/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
squashme! ignore all parameters in content-type, not just exactly one (grgrgrgrgrgrgrgrrr!!!)


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/c4845204
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/c4845204
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/c4845204

Branch: refs/heads/windsor-merge
Commit: c48452040a08431df560d080f0ebca7a9c490ad6
Parents: 439db65
Author: Robert Newson <rn...@apache.org>
Authored: Fri Aug 8 14:57:51 2014 +0100
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_httpd.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/c4845204/src/couch_httpd.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd.erl b/src/couch_httpd.erl
index 8022a02..0c5af59 100644
--- a/src/couch_httpd.erl
+++ b/src/couch_httpd.erl
@@ -413,7 +413,7 @@ validate_ctype(Req, Ctype) ->
     ReqCtype ->
         case string:tokens(ReqCtype, ";") of
         [Ctype] -> ok;
-        [Ctype, _Rest] -> ok;
+        [Ctype | _Rest] -> ok;
         _Else ->
             throw({bad_ctype, "Content-Type must be "++Ctype})
         end


[04/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Always report why couch_server crashes

On servers with a large number of open databases the couch_server state
is too large to be logged, and so when it crashes we miss the reason for
the crash.  This patch adds an extra message that redacts enough of the
server state to ensure a properly-formatted message.

BugzID: 26902


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/6083a37b
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/6083a37b
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/6083a37b

Branch: refs/heads/windsor-merge
Commit: 6083a37b87093d314adb491bab6d2e81e7c71610
Parents: f3e4538
Author: Adam Kocoloski <ad...@cloudant.com>
Authored: Fri Feb 28 12:45:02 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:42:38 2014 +0100

----------------------------------------------------------------------
 src/couch_server.erl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/6083a37b/src/couch_server.erl
----------------------------------------------------------------------
diff --git a/src/couch_server.erl b/src/couch_server.erl
index 28dfc79..964ecad 100644
--- a/src/couch_server.erl
+++ b/src/couch_server.erl
@@ -210,7 +210,9 @@ init([]) ->
                 update_lru_on_read=UpdateLruOnRead,
                 start_time=couch_util:rfc1123_date()}}.
 
-terminate(_Reason, _Srv) ->
+terminate(Reason, Srv) ->
+    ?LOG_ERROR("couch_server terminating with ~p, state ~2048p", [Reason,
+         Srv#server{dbname_regexp = redacted, lru = redacted}]),
     ets:foldl(fun(#db{main_pid=Pid}, _) -> couch_util:shutdown_sync(Pid) end,
         nil, couch_dbs),
     ok.


[21/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Match the old ordering for attachments

This affects when we're updating attachments and need to merge updated
attachments onto a non-deleted document. The old code useda  lists:map/2
where as the new code is a tail recursive approach. This just reverses
the accumulator to match the same order as we had previously.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/b72ae144
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/b72ae144
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/b72ae144

Branch: refs/heads/windsor-merge
Commit: b72ae1446ef1f40aaef925d50f358cca90834dc9
Parents: 14c384f
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Aug 11 12:29:56 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_att.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/b72ae144/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index 347a8fd..e174c48 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -273,7 +273,7 @@ merge_stubs([Att | Rest], OnDisk, Merged) ->
             merge_stubs(Rest, OnDisk, [Att | Merged])
     end;
 merge_stubs([], _, Merged) ->
-    {ok, Merged}.
+    {ok, lists:reverse(Merged)}.
 
 
 disk_info(_, []) ->


[18/26] couch commit: updated refs/heads/windsor-merge to 14dc5e9

Posted by rn...@apache.org.
Remove upgrade code that was re-introduced during the merge


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/7da0801d
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/7da0801d
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/7da0801d

Branch: refs/heads/windsor-merge
Commit: 7da0801d9e6f5eb8d859d8bb0b8b7321cbef72ac
Parents: 296b98a
Author: Paul J. Davis <pa...@gmail.com>
Authored: Sat Aug 16 21:27:21 2014 -0500
Committer: Robert Newson <rn...@apache.org>
Committed: Tue Aug 26 10:44:12 2014 +0100

----------------------------------------------------------------------
 src/couch_att.erl | 4 ----
 1 file changed, 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/7da0801d/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index f0d2dd1..232bb60 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -579,10 +579,6 @@ foldl(Att, Fun, Acc) ->
 
 foldl(Bin, _Att, Fun, Acc) when is_binary(Bin) ->
     Fun(Bin, Acc);
-foldl({Fd, Sp}, Att, Fun, Acc) when is_tuple(Sp) orelse Sp == null ->
-    % 09 UPGRADE CODE
-    Len = fetch(att_len, Att),
-    couch_stream:old_foldl(Fd, Sp, Len, Fun, Acc);
 foldl({Fd, Sp}, Att, Fun, Acc) ->
     Md5 = fetch(md5, Att),
     couch_stream:foldl(Fd, Sp, Md5, Fun, Acc);