You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by jc...@apache.org on 2009/09/17 00:04:19 UTC

svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Author: jchris
Date: Wed Sep 16 22:04:18 2009
New Revision: 815984

URL: http://svn.apache.org/viewvc?rev=815984&view=rev
Log:
include_docs now take an _id (as well as a _rev) in the emitted value, to load docs other than the one doing the emitting. This means you can have one doc list a set of other docs to load in a single query. Enjoy!

Modified:
    couchdb/trunk/share/www/script/test/view_include_docs.js
    couchdb/trunk/src/couchdb/couch_httpd_db.erl
    couchdb/trunk/src/couchdb/couch_httpd_view.erl

Modified: couchdb/trunk/share/www/script/test/view_include_docs.js
URL: http://svn.apache.org/viewvc/couchdb/trunk/share/www/script/test/view_include_docs.js?rev=815984&r1=815983&r2=815984&view=diff
==============================================================================
--- couchdb/trunk/share/www/script/test/view_include_docs.js (original)
+++ couchdb/trunk/share/www/script/test/view_include_docs.js Wed Sep 16 22:04:18 2009
@@ -29,6 +29,9 @@
       with_prev: {
         map: "function(doc){if(doc.prev) emit(doc._id,{'_rev':doc.prev}); else emit(doc._id,{'_rev':doc._rev});}"
       },
+      with_id: {
+        map: "function(doc) {if(doc.link_id) { var value = {'_id':doc.link_id}; if (doc.link_rev) {value._rev = doc.link_rev}; emit(doc._id, value);}};"
+      },
       summate: {
         map:"function (doc) {emit(doc.integer, doc.integer)};",
         reduce:"function (keys, values) { return sum(values); };"
@@ -84,6 +87,21 @@
   T(resp.rows.length == 1);
   T(resp.rows[0].value == 4950);
 
+  T(db.save({
+    "_id": "link-to-10",
+    "link_id" : "10"
+  }).ok);
+  
+  // you can link to another doc from a value.
+  resp = db.view("test/with_id", {key:"link-to-10"});
+  T(resp.rows[0].key == "link-to-10");
+  T(resp.rows[0].value["_id"] == "10");
+  
+  resp = db.view("test/with_id", {key:"link-to-10",include_docs: true});
+  T(resp.rows[0].key == "link-to-10");
+  T(resp.rows[0].value["_id"] == "10");
+  T(resp.rows[0].doc._id == "10");
+
   // Check emitted _rev controls things
   resp = db.allDocs({include_docs: true}, ["0"]);
   var before = resp.rows[0].doc;
@@ -91,11 +109,13 @@
   var after = db.open("0");
   after.integer = 100;
   after.prev = after._rev;
-  T(db.save(after).ok);
+  resp = db.save(after)
+  T(resp.ok);
   
   var after = db.open("0");
-  T(after._rev != after.prev);
-  T(after.integer == 100);
+  TEquals(resp.rev, after._rev, "fails with firebug running");
+  T(after._rev != after.prev, "passes");
+  TEquals(100, after.integer, "fails with firebug running");
 
   // should emit the previous revision
   resp = db.view("test/with_prev", {include_docs: true}, ["0"]);

Modified: couchdb/trunk/src/couchdb/couch_httpd_db.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_httpd_db.erl?rev=815984&r1=815983&r2=815984&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_httpd_db.erl (original)
+++ couchdb/trunk/src/couchdb/couch_httpd_db.erl Wed Sep 16 22:04:18 2009
@@ -192,7 +192,7 @@
 
 changes_row(Db, Seq, Id, Del, Results, Rev, true) ->
     {[{seq,Seq},{id,Id},{changes,Results}] ++ deleted_item(Del) ++
-        couch_httpd_view:doc_member(Db, Id, Rev)};
+        couch_httpd_view:doc_member(Db, {Id, Rev})};
 changes_row(_, Seq, Id, Del, Results, _, false) ->
     {[{seq,Seq},{id,Id},{changes,Results}] ++ deleted_item(Del)}.
 

Modified: couchdb/trunk/src/couchdb/couch_httpd_view.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_httpd_view.erl?rev=815984&r1=815983&r2=815984&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_httpd_view.erl (original)
+++ couchdb/trunk/src/couchdb/couch_httpd_view.erl Wed Sep 16 22:04:18 2009
@@ -18,7 +18,7 @@
 -export([get_stale_type/1, get_reduce_type/1, parse_view_params/3]).
 -export([make_view_fold_fun/6, finish_view_fold/4, view_row_obj/3]).
 -export([view_group_etag/2, view_group_etag/3, make_reduce_fold_funs/5]).
--export([design_doc_view/5, parse_bool_param/1, doc_member/3]).
+-export([design_doc_view/5, parse_bool_param/1, doc_member/2]).
 -export([make_key_options/1]).
 
 -import(couch_httpd,
@@ -589,17 +589,18 @@
     Rev0 ->
         couch_doc:parse_rev(Rev0)
     end,
-    view_row_with_doc(Db, {{Key, DocId}, {Props}}, Rev);
+    IncludeId = proplists:get_value(<<"_id">>, Props, DocId),
+    view_row_with_doc(Db, {{Key, DocId}, {Props}}, {IncludeId, Rev});
 view_row_obj(Db, {{Key, DocId}, Value}, true) ->
-    view_row_with_doc(Db, {{Key, DocId}, Value}, nil);
+    view_row_with_doc(Db, {{Key, DocId}, Value}, {DocId, nil});
 % the normal case for rendering a view row
 view_row_obj(_Db, {{Key, DocId}, Value}, _IncludeDocs) ->
     {[{id, DocId}, {key, Key}, {value, Value}]}.
 
-view_row_with_doc(Db, {{Key, DocId}, Value}, Rev) ->
-    {[{id, DocId}, {key, Key}, {value, Value}] ++ doc_member(Db, DocId, Rev)}.
+view_row_with_doc(Db, {{Key, DocId}, Value}, IdRev) ->
+    {[{id, DocId}, {key, Key}, {value, Value}] ++ doc_member(Db, IdRev)}.
 
-doc_member(Db, DocId, Rev) ->
+doc_member(Db, {DocId, Rev}) ->
     ?LOG_DEBUG("Include Doc: ~p ~p", [DocId, Rev]),
     case (catch couch_httpd_db:couch_doc_open(Db, DocId, Rev, [])) of
         #doc{} = Doc ->



Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Chris Anderson <jc...@apache.org>.
On Thu, Sep 17, 2009 at 3:45 PM, Delta 2038 <de...@gmail.com> wrote:
> On Wed, Sep 16, 2009 at 6:42 PM, Chris Anderson <jc...@apache.org> wrote:
>
>> Of course, the usual include_docs performance caveats apply, but this
>> time we get a feature out of it!
>>
>
> A bit off-topic here, but would you elaborate maybe just a little bit the
> include_docs performance caveats? I assume that it involves reading another
> view to grab the documents since they are not present in the current view?
> We are interested in learning how much of a performance hit it actually
> incurs. We could set up some load tests ourselves to find out, but if you
> already have some pretty good idea... :)
>

Include docs, whether fetching a specified doc, or just the doc that
emitted the row, has a per-row cost of pulling documents from the
database. So it could incur additional disk seeks - this shouldn't
matter unless you are just on the edge of being able to handle the
load you are experiencing.

Generally, views are awesome because they can be streamed from disk
with a minimum of seeks. Include docs doesn't have this bit of
awesome. I'm not sure what numbers you'd see, but it would probably
depend a lot on hardware, load, and configuration (putting views on
their own spindle could help mitigate this a fair amount.)



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Paul Davis <pa...@gmail.com>.
On Thu, Sep 17, 2009 at 6:45 PM, Delta 2038 <de...@gmail.com> wrote:
> On Wed, Sep 16, 2009 at 6:42 PM, Chris Anderson <jc...@apache.org> wrote:
>
>> Of course, the usual include_docs performance caveats apply, but this
>> time we get a feature out of it!
>>
>
> A bit off-topic here, but would you elaborate maybe just a little bit the
> include_docs performance caveats? I assume that it involves reading another
> view to grab the documents since they are not present in the current view?
> We are interested in learning how much of a performance hit it actually
> incurs. We could set up some load tests ourselves to find out, but if you
> already have some pretty good idea... :)
>

You've basically got it. In a nutshell, when using ?include_docs you
have to read from a different location on disk and navigate the
document id btree. The alternative is to emit the doc as part of the
value so that reads are faster (at the expense of slower builds).

I haven't seen any concrete numbers on the tradeoffs. Generally
speaking if you're only wanting part of a doc, its probably best to
emit just that part. Granted if that part is most of the doc, and its
a big doc, it could cause a noticeable impact on view builds.

I'd be interested in any numbers you generate exploring the difference.

Paul Davis

Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Delta 2038 <de...@gmail.com>.
On Wed, Sep 16, 2009 at 6:42 PM, Chris Anderson <jc...@apache.org> wrote:

> Of course, the usual include_docs performance caveats apply, but this
> time we get a feature out of it!
>

A bit off-topic here, but would you elaborate maybe just a little bit the
include_docs performance caveats? I assume that it involves reading another
view to grab the documents since they are not present in the current view?
We are interested in learning how much of a performance hit it actually
incurs. We could set up some load tests ourselves to find out, but if you
already have some pretty good idea... :)

Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Curt Arnold <ca...@apache.org>.
On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>
> I don't think I see how this would affect a partitioning scheme. I
> guess it could make people more willing to split documents that might
> be better served as a single doc, but that'd be the same regardless
> with features like multi-key fetch.

Actually, it is the other way around.  The wiki proposal effectively  
uses docid as an entity identifier and would provide advantages to  
designs where an entity and doc are synonymous.  The patch can  
accomplish the same end result, but it doesn't add magic to the JSON.

>
>> This appears to be better, but it seems to be assigning some magic  
>> behavior
>> to the second argument so it specifies both the value.  Was there  
>> previous
>> discussion or a bug report filed on this that I missed?
>
> There wasn't a ticket, but the new bit is only a minor tweak to a
> lesser know feature. Using the _rev member of an emitted object comes
> from the ?include_docs=true feature. When I wrote that I fretted over
> the race condition of pulling a doc revision that was different than
> the one that emitted the row. The best answer at the time was "allow
> people to specify a _rev" which was all and good.
>
> The new bit just extends that slightly.
>
>> I'm thinking it would be cleaner to support an optional 3 argument  
>> to emit
>> with { _id:"", _rev:"" }?  The current two argument emit() would be  
>> the
>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>
>
> I think this is a pretty good idea. Though unless I'm missing
> something the implementation difficulty rises noticeably. The only
> initial drawback I see is how we explain the semantics of default
> behavior. For ?include_docs=true it was simply "current version or
> version specified by _rev". Obviously adding _id makes that weirder,
> but I'm don't see a more clear explanation with the third parameter
> version.
>
> Paul Davis

In the current code, I don't see how you would emit a string, array or  
other value and also affect the included documents.  If added as a  
third parameter, we could also dispense with using the underscores in  
the argument, so it would really be:

emit(key, value, { id:doc._id, rev:doc._rev});

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Chris Anderson <jc...@apache.org>.
On Sun, Oct 25, 2009 at 9:24 PM, Nathan Stott <nr...@gmail.com> wrote:
> Sorry for dragging up an old discussion, but I'm curious as to if this
> feature is in 0.10 and if there's documentation about it.  I did some
> searching and could not locate any.

This didn't make the cut for 0.10. The closest documentation is the
test portion of the patch:

http://svn.apache.org/viewvc?view=revision&revision=815984

>
> On Thu, Sep 17, 2009 at 10:44 PM, Chris Anderson <jc...@apache.org> wrote:
>
>> On Thu, Sep 17, 2009 at 4:33 PM, Paul Davis <pa...@gmail.com>
>> wrote:
>> >> I guess what I'm saying is that I think the include doc "pointer"
>> >> belongs in the value, not in some other place. It strikes me as
>> >> exactly what emitted values are for, to hold arbitrary data associated
>> >> with the key.
>> >
>> > By assigning implicit behavior based on the value, its no longer
>> > arbitrary. _rev and now _id are restricted in what they can represent.
>> > For instance, what happens if I emit {"_id": true} with
>> > include_docs=true?
>> >
>> > Think of the third value as a "row options" variable. The two concepts
>> > are basically "We make assumptions about what you wanted based on what
>> > you emit" or "We make no assumptions. You must be precise in what you
>> > want". Being precise is important because it keeps the concept-API
>> > simpler, easier to remember, and easier to reason about.
>> >
>> > It may seem trivial at this point, but what if we add a feature for
>> > following keys instead of id's? And then what if we allow a row to
>> > stop traversal in a breadth first search scheme? Putting these into an
>> > "options variable" makes more sense to me because the concept is that
>> > they affect how the row is interpreted by the server vs what the row
>> > represents to the client.
>> >
>> > Granted that's just the purist argument and it doesn't really mean
>> > anything until there's an implementation. So until someone gives it a
>> > go and puts a patch in JIRA there's no reason to change the current
>> > behavior. I just don't want any future contributors to think this
>> > isn't an idea worth pursuing.
>> >
>> > Paul Davis
>> >
>>
>> I can see the argument now that you describe future features we might
>> build in this manner. However, I still think it's generally OK to muck
>> around in the value namespace, and I wouldn't be opposed to reserving
>> the _ namespace in view values (but I don't think it's necessary).
>>
>> We don't need to be very formal here. If someone is writing a view to
>> take advantage of include_docs special features (or key following, or
>> other future features) they will be able to write their view around
>> the feature implementation.
>>
>> So for hypothetical future instance, if you absolutely must have a
>> view with row values that look like {"_stop":true}, and you don't want
>> to trigger the stop-iteration, then simply don't query the view with
>> ?stop_on_stop=true. If you plan to use the (hypothetical) stop
>> iterator feature, then it's up to you to only have "_stop" : true in
>> your value when you mean it. I don't think this is a big deal, as
>> you'll be writing the map functions around these features anyway, so
>> you can always envelope anything: eg {"my_real_data":{"_stop":true},
>> "_stop" : false}.
>>
>> The bigger question is philosophical. I generally tend to recoil from
>> the purist argument, because once you start to go down that road you
>> face a danger of lots of implementation (and API) complexity for very
>> little practical benefit. I'm not saying I'd vote against a patch
>> here, but I think energy could be put to better use.
>>
>> Chris
>>
>>
>> --
>> Chris Anderson
>> http://jchrisa.net
>> http://couch.io
>>
>



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Nathan Stott <nr...@gmail.com>.
Sorry for dragging up an old discussion, but I'm curious as to if this
feature is in 0.10 and if there's documentation about it.  I did some
searching and could not locate any.

On Thu, Sep 17, 2009 at 10:44 PM, Chris Anderson <jc...@apache.org> wrote:

> On Thu, Sep 17, 2009 at 4:33 PM, Paul Davis <pa...@gmail.com>
> wrote:
> >> I guess what I'm saying is that I think the include doc "pointer"
> >> belongs in the value, not in some other place. It strikes me as
> >> exactly what emitted values are for, to hold arbitrary data associated
> >> with the key.
> >
> > By assigning implicit behavior based on the value, its no longer
> > arbitrary. _rev and now _id are restricted in what they can represent.
> > For instance, what happens if I emit {"_id": true} with
> > include_docs=true?
> >
> > Think of the third value as a "row options" variable. The two concepts
> > are basically "We make assumptions about what you wanted based on what
> > you emit" or "We make no assumptions. You must be precise in what you
> > want". Being precise is important because it keeps the concept-API
> > simpler, easier to remember, and easier to reason about.
> >
> > It may seem trivial at this point, but what if we add a feature for
> > following keys instead of id's? And then what if we allow a row to
> > stop traversal in a breadth first search scheme? Putting these into an
> > "options variable" makes more sense to me because the concept is that
> > they affect how the row is interpreted by the server vs what the row
> > represents to the client.
> >
> > Granted that's just the purist argument and it doesn't really mean
> > anything until there's an implementation. So until someone gives it a
> > go and puts a patch in JIRA there's no reason to change the current
> > behavior. I just don't want any future contributors to think this
> > isn't an idea worth pursuing.
> >
> > Paul Davis
> >
>
> I can see the argument now that you describe future features we might
> build in this manner. However, I still think it's generally OK to muck
> around in the value namespace, and I wouldn't be opposed to reserving
> the _ namespace in view values (but I don't think it's necessary).
>
> We don't need to be very formal here. If someone is writing a view to
> take advantage of include_docs special features (or key following, or
> other future features) they will be able to write their view around
> the feature implementation.
>
> So for hypothetical future instance, if you absolutely must have a
> view with row values that look like {"_stop":true}, and you don't want
> to trigger the stop-iteration, then simply don't query the view with
> ?stop_on_stop=true. If you plan to use the (hypothetical) stop
> iterator feature, then it's up to you to only have "_stop" : true in
> your value when you mean it. I don't think this is a big deal, as
> you'll be writing the map functions around these features anyway, so
> you can always envelope anything: eg {"my_real_data":{"_stop":true},
> "_stop" : false}.
>
> The bigger question is philosophical. I generally tend to recoil from
> the purist argument, because once you start to go down that road you
> face a danger of lots of implementation (and API) complexity for very
> little practical benefit. I'm not saying I'd vote against a patch
> here, but I think energy could be put to better use.
>
> Chris
>
>
> --
> Chris Anderson
> http://jchrisa.net
> http://couch.io
>

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Chris Anderson <jc...@apache.org>.
On Thu, Sep 17, 2009 at 4:33 PM, Paul Davis <pa...@gmail.com> wrote:
>> I guess what I'm saying is that I think the include doc "pointer"
>> belongs in the value, not in some other place. It strikes me as
>> exactly what emitted values are for, to hold arbitrary data associated
>> with the key.
>
> By assigning implicit behavior based on the value, its no longer
> arbitrary. _rev and now _id are restricted in what they can represent.
> For instance, what happens if I emit {"_id": true} with
> include_docs=true?
>
> Think of the third value as a "row options" variable. The two concepts
> are basically "We make assumptions about what you wanted based on what
> you emit" or "We make no assumptions. You must be precise in what you
> want". Being precise is important because it keeps the concept-API
> simpler, easier to remember, and easier to reason about.
>
> It may seem trivial at this point, but what if we add a feature for
> following keys instead of id's? And then what if we allow a row to
> stop traversal in a breadth first search scheme? Putting these into an
> "options variable" makes more sense to me because the concept is that
> they affect how the row is interpreted by the server vs what the row
> represents to the client.
>
> Granted that's just the purist argument and it doesn't really mean
> anything until there's an implementation. So until someone gives it a
> go and puts a patch in JIRA there's no reason to change the current
> behavior. I just don't want any future contributors to think this
> isn't an idea worth pursuing.
>
> Paul Davis
>

I can see the argument now that you describe future features we might
build in this manner. However, I still think it's generally OK to muck
around in the value namespace, and I wouldn't be opposed to reserving
the _ namespace in view values (but I don't think it's necessary).

We don't need to be very formal here. If someone is writing a view to
take advantage of include_docs special features (or key following, or
other future features) they will be able to write their view around
the feature implementation.

So for hypothetical future instance, if you absolutely must have a
view with row values that look like {"_stop":true}, and you don't want
to trigger the stop-iteration, then simply don't query the view with
?stop_on_stop=true. If you plan to use the (hypothetical) stop
iterator feature, then it's up to you to only have "_stop" : true in
your value when you mean it. I don't think this is a big deal, as
you'll be writing the map functions around these features anyway, so
you can always envelope anything: eg {"my_real_data":{"_stop":true},
"_stop" : false}.

The bigger question is philosophical. I generally tend to recoil from
the purist argument, because once you start to go down that road you
face a danger of lots of implementation (and API) complexity for very
little practical benefit. I'm not saying I'd vote against a patch
here, but I think energy could be put to better use.

Chris


-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Paul Davis <pa...@gmail.com>.
> I guess what I'm saying is that I think the include doc "pointer"
> belongs in the value, not in some other place. It strikes me as
> exactly what emitted values are for, to hold arbitrary data associated
> with the key.

By assigning implicit behavior based on the value, its no longer
arbitrary. _rev and now _id are restricted in what they can represent.
For instance, what happens if I emit {"_id": true} with
include_docs=true?

Think of the third value as a "row options" variable. The two concepts
are basically "We make assumptions about what you wanted based on what
you emit" or "We make no assumptions. You must be precise in what you
want". Being precise is important because it keeps the concept-API
simpler, easier to remember, and easier to reason about.

It may seem trivial at this point, but what if we add a feature for
following keys instead of id's? And then what if we allow a row to
stop traversal in a breadth first search scheme? Putting these into an
"options variable" makes more sense to me because the concept is that
they affect how the row is interpreted by the server vs what the row
represents to the client.

Granted that's just the purist argument and it doesn't really mean
anything until there's an implementation. So until someone gives it a
go and puts a patch in JIRA there's no reason to change the current
behavior. I just don't want any future contributors to think this
isn't an idea worth pursuing.

Paul Davis

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Chris Anderson <jc...@apache.org>.
On Thu, Sep 17, 2009 at 3:57 PM, Paul Davis <pa...@gmail.com> wrote:
> On Thu, Sep 17, 2009 at 6:54 PM, Chris Anderson <jc...@apache.org> wrote:
>> On Thu, Sep 17, 2009 at 9:08 AM, Paul Davis <pa...@gmail.com> wrote:
>>> On Thu, Sep 17, 2009 at 11:58 AM, Chris Anderson <jc...@apache.org> wrote:
>>>> On Thu, Sep 17, 2009 at 7:11 AM, Curt Arnold <ca...@apache.org> wrote:
>>>>>
>>>>> On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>>>>>>>>
>>>>>>>> In Governator voice: "It's not a JOIN."
>>>>>>>>
>>>>>>>> But you can use it if you have a doc like:
>>>>>>>>
>>>>>>>> {"_id":"my-outline",
>>>>>>>> "other_docs":["docid,"other-docid"]
>>>>>>>> }
>>>>>>>>
>>>>>>>> and then a view like
>>>>>>>>
>>>>>>>> function(doc) {
>>>>>>>> for (var i=0; i < doc.other_docs.length; i++) {
>>>>>>>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>>>>>>>> };
>>>>>>>> }
>>>>>
>>>>>
>>>>> So if I do,
>>>>>
>>>>> emit(key, doc)
>>>>>
>>>>> am I specifying the key value and overriding the default document to be
>>>>> included since the value would contain _id and _rev members?  They'd have
>>>>> the same value, so it wouldn't be detectable, but still no clear boundary
>>>>> between the value parts of the parameter and the document part of the
>>>>> parameter.
>>>>>
>>>>>
>>>>>>
>>>>>>> I'm thinking it would be cleaner to support an optional 3 argument to
>>>>>>> emit
>>>>>>> with { _id:"", _rev:"" }?  The current two argument emit() would be the
>>>>>>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>>>>>>
>>>>>>
>>>>>> I think this is a pretty good idea. Though unless I'm missing
>>>>>> something the implementation difficulty rises noticeably. The only
>>>>>> initial drawback I see is how we explain the semantics of default
>>>>>> behavior. For ?include_docs=true it was simply "current version or
>>>>>> version specified by _rev". Obviously adding _id makes that weirder,
>>>>>> but I'm don't see a more clear explanation with the third parameter
>>>>>> version.
>>>>>>
>>>>>> Paul Davis
>>>>>
>>>>>
>>>>> A three parameter version would also allow like:
>>>>>
>>>>> emit(key, value, null);
>>>>>
>>>>> when you intentionally do not want any documents provided if the user
>>>>> specifies include_docs=true.
>>>>>
>>>>>
>>>>
>>>> I don't see a use case for the 3 parameter version.
>>>>
>>>
>>> I think its more about having a clean API. Using members on the value
>>> does confuse the semantics quite a bit. Pragmatically though, it
>>> wouldn't really change the end result.
>>>
>>>> If you are writing a view to take advantage of the linking feature,
>>>> you would know that you are working to specify the linked doc _id and
>>>> perhaps _rev in the value.
>>>>
>>>> If you aren't planning on using include docs, or just want the normal
>>>> version of the feature, (include the doc that emitted) then you can
>>>> just ignore the whole thing.
>>>>
>>>> Generally I'm leaning strongly toward keep-it-super-simple, as
>>>> anything that a user wants to do is possible with the API as it
>>>> exists. (If you want to emit a full doc but link to another, you can
>>>> stick the emitted doc in an envelope.)
>>>>
>>>> Chris
>>>>
>>>>
>>>>
>>>> --
>>>> Chris Anderson
>>>> http://jchrisa.net
>>>> http://couch.io
>>>>
>>>
>>> While the third parameter option is definitely cleaner in terms of API
>>> design, I would have to agree that the cost in terms of added
>>> implementation complexity is a bit forbidding. For the moment without
>>> attempting the implementation I'd leave it as is, but would be in
>>> favor of the three parameter version if someone provides a patch.
>>>
>>> Specifically, the added complexity is going to come from how the third
>>> parameter is stored in the btree, how it affects sorting semantics,
>>> and how it relates to reductions. And that's just off the top of my
>>> head without trying to write the code.
>>>
>>
>> Yeah let's just punt on that, I don't see any value there. If people
>> are getting concerned about semantics I'd be fine suggesting we change
>> the whole thing to be emit(key, {"foo":"bar", "_include_doc" : {"_id"
>> : "X", "_rev":"1-abc"}}) but that just seems gratuitous and
>> overthought.
>>
>
> There's plenty of value to adding it. Its just that there's also
> plenty of complexity. I may or may not take a crack at implementing
> it, but for now I agree that its not a priority.

I guess what I'm saying is that I think the include doc "pointer"
belongs in the value, not in some other place. It strikes me as
exactly what emitted values are for, to hold arbitrary data associated
with the key.

Chris

>
>>> Paul Davis
>>>
>>
>>
>>
>> --
>> Chris Anderson
>> http://jchrisa.net
>> http://couch.io
>>
>



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Paul Davis <pa...@gmail.com>.
On Thu, Sep 17, 2009 at 6:54 PM, Chris Anderson <jc...@apache.org> wrote:
> On Thu, Sep 17, 2009 at 9:08 AM, Paul Davis <pa...@gmail.com> wrote:
>> On Thu, Sep 17, 2009 at 11:58 AM, Chris Anderson <jc...@apache.org> wrote:
>>> On Thu, Sep 17, 2009 at 7:11 AM, Curt Arnold <ca...@apache.org> wrote:
>>>>
>>>> On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>>>>>>>
>>>>>>> In Governator voice: "It's not a JOIN."
>>>>>>>
>>>>>>> But you can use it if you have a doc like:
>>>>>>>
>>>>>>> {"_id":"my-outline",
>>>>>>> "other_docs":["docid,"other-docid"]
>>>>>>> }
>>>>>>>
>>>>>>> and then a view like
>>>>>>>
>>>>>>> function(doc) {
>>>>>>> for (var i=0; i < doc.other_docs.length; i++) {
>>>>>>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>>>>>>> };
>>>>>>> }
>>>>
>>>>
>>>> So if I do,
>>>>
>>>> emit(key, doc)
>>>>
>>>> am I specifying the key value and overriding the default document to be
>>>> included since the value would contain _id and _rev members?  They'd have
>>>> the same value, so it wouldn't be detectable, but still no clear boundary
>>>> between the value parts of the parameter and the document part of the
>>>> parameter.
>>>>
>>>>
>>>>>
>>>>>> I'm thinking it would be cleaner to support an optional 3 argument to
>>>>>> emit
>>>>>> with { _id:"", _rev:"" }?  The current two argument emit() would be the
>>>>>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>>>>>
>>>>>
>>>>> I think this is a pretty good idea. Though unless I'm missing
>>>>> something the implementation difficulty rises noticeably. The only
>>>>> initial drawback I see is how we explain the semantics of default
>>>>> behavior. For ?include_docs=true it was simply "current version or
>>>>> version specified by _rev". Obviously adding _id makes that weirder,
>>>>> but I'm don't see a more clear explanation with the third parameter
>>>>> version.
>>>>>
>>>>> Paul Davis
>>>>
>>>>
>>>> A three parameter version would also allow like:
>>>>
>>>> emit(key, value, null);
>>>>
>>>> when you intentionally do not want any documents provided if the user
>>>> specifies include_docs=true.
>>>>
>>>>
>>>
>>> I don't see a use case for the 3 parameter version.
>>>
>>
>> I think its more about having a clean API. Using members on the value
>> does confuse the semantics quite a bit. Pragmatically though, it
>> wouldn't really change the end result.
>>
>>> If you are writing a view to take advantage of the linking feature,
>>> you would know that you are working to specify the linked doc _id and
>>> perhaps _rev in the value.
>>>
>>> If you aren't planning on using include docs, or just want the normal
>>> version of the feature, (include the doc that emitted) then you can
>>> just ignore the whole thing.
>>>
>>> Generally I'm leaning strongly toward keep-it-super-simple, as
>>> anything that a user wants to do is possible with the API as it
>>> exists. (If you want to emit a full doc but link to another, you can
>>> stick the emitted doc in an envelope.)
>>>
>>> Chris
>>>
>>>
>>>
>>> --
>>> Chris Anderson
>>> http://jchrisa.net
>>> http://couch.io
>>>
>>
>> While the third parameter option is definitely cleaner in terms of API
>> design, I would have to agree that the cost in terms of added
>> implementation complexity is a bit forbidding. For the moment without
>> attempting the implementation I'd leave it as is, but would be in
>> favor of the three parameter version if someone provides a patch.
>>
>> Specifically, the added complexity is going to come from how the third
>> parameter is stored in the btree, how it affects sorting semantics,
>> and how it relates to reductions. And that's just off the top of my
>> head without trying to write the code.
>>
>
> Yeah let's just punt on that, I don't see any value there. If people
> are getting concerned about semantics I'd be fine suggesting we change
> the whole thing to be emit(key, {"foo":"bar", "_include_doc" : {"_id"
> : "X", "_rev":"1-abc"}}) but that just seems gratuitous and
> overthought.
>

There's plenty of value to adding it. Its just that there's also
plenty of complexity. I may or may not take a crack at implementing
it, but for now I agree that its not a priority.

>> Paul Davis
>>
>
>
>
> --
> Chris Anderson
> http://jchrisa.net
> http://couch.io
>

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Chris Anderson <jc...@apache.org>.
On Thu, Sep 17, 2009 at 9:08 AM, Paul Davis <pa...@gmail.com> wrote:
> On Thu, Sep 17, 2009 at 11:58 AM, Chris Anderson <jc...@apache.org> wrote:
>> On Thu, Sep 17, 2009 at 7:11 AM, Curt Arnold <ca...@apache.org> wrote:
>>>
>>> On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>>>>>>
>>>>>> In Governator voice: "It's not a JOIN."
>>>>>>
>>>>>> But you can use it if you have a doc like:
>>>>>>
>>>>>> {"_id":"my-outline",
>>>>>> "other_docs":["docid,"other-docid"]
>>>>>> }
>>>>>>
>>>>>> and then a view like
>>>>>>
>>>>>> function(doc) {
>>>>>> for (var i=0; i < doc.other_docs.length; i++) {
>>>>>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>>>>>> };
>>>>>> }
>>>
>>>
>>> So if I do,
>>>
>>> emit(key, doc)
>>>
>>> am I specifying the key value and overriding the default document to be
>>> included since the value would contain _id and _rev members?  They'd have
>>> the same value, so it wouldn't be detectable, but still no clear boundary
>>> between the value parts of the parameter and the document part of the
>>> parameter.
>>>
>>>
>>>>
>>>>> I'm thinking it would be cleaner to support an optional 3 argument to
>>>>> emit
>>>>> with { _id:"", _rev:"" }?  The current two argument emit() would be the
>>>>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>>>>
>>>>
>>>> I think this is a pretty good idea. Though unless I'm missing
>>>> something the implementation difficulty rises noticeably. The only
>>>> initial drawback I see is how we explain the semantics of default
>>>> behavior. For ?include_docs=true it was simply "current version or
>>>> version specified by _rev". Obviously adding _id makes that weirder,
>>>> but I'm don't see a more clear explanation with the third parameter
>>>> version.
>>>>
>>>> Paul Davis
>>>
>>>
>>> A three parameter version would also allow like:
>>>
>>> emit(key, value, null);
>>>
>>> when you intentionally do not want any documents provided if the user
>>> specifies include_docs=true.
>>>
>>>
>>
>> I don't see a use case for the 3 parameter version.
>>
>
> I think its more about having a clean API. Using members on the value
> does confuse the semantics quite a bit. Pragmatically though, it
> wouldn't really change the end result.
>
>> If you are writing a view to take advantage of the linking feature,
>> you would know that you are working to specify the linked doc _id and
>> perhaps _rev in the value.
>>
>> If you aren't planning on using include docs, or just want the normal
>> version of the feature, (include the doc that emitted) then you can
>> just ignore the whole thing.
>>
>> Generally I'm leaning strongly toward keep-it-super-simple, as
>> anything that a user wants to do is possible with the API as it
>> exists. (If you want to emit a full doc but link to another, you can
>> stick the emitted doc in an envelope.)
>>
>> Chris
>>
>>
>>
>> --
>> Chris Anderson
>> http://jchrisa.net
>> http://couch.io
>>
>
> While the third parameter option is definitely cleaner in terms of API
> design, I would have to agree that the cost in terms of added
> implementation complexity is a bit forbidding. For the moment without
> attempting the implementation I'd leave it as is, but would be in
> favor of the three parameter version if someone provides a patch.
>
> Specifically, the added complexity is going to come from how the third
> parameter is stored in the btree, how it affects sorting semantics,
> and how it relates to reductions. And that's just off the top of my
> head without trying to write the code.
>

Yeah let's just punt on that, I don't see any value there. If people
are getting concerned about semantics I'd be fine suggesting we change
the whole thing to be emit(key, {"foo":"bar", "_include_doc" : {"_id"
: "X", "_rev":"1-abc"}}) but that just seems gratuitous and
overthought.

> Paul Davis
>



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Paul Davis <pa...@gmail.com>.
On Thu, Sep 17, 2009 at 11:58 AM, Chris Anderson <jc...@apache.org> wrote:
> On Thu, Sep 17, 2009 at 7:11 AM, Curt Arnold <ca...@apache.org> wrote:
>>
>> On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>>>>>
>>>>> In Governator voice: "It's not a JOIN."
>>>>>
>>>>> But you can use it if you have a doc like:
>>>>>
>>>>> {"_id":"my-outline",
>>>>> "other_docs":["docid,"other-docid"]
>>>>> }
>>>>>
>>>>> and then a view like
>>>>>
>>>>> function(doc) {
>>>>> for (var i=0; i < doc.other_docs.length; i++) {
>>>>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>>>>> };
>>>>> }
>>
>>
>> So if I do,
>>
>> emit(key, doc)
>>
>> am I specifying the key value and overriding the default document to be
>> included since the value would contain _id and _rev members?  They'd have
>> the same value, so it wouldn't be detectable, but still no clear boundary
>> between the value parts of the parameter and the document part of the
>> parameter.
>>
>>
>>>
>>>> I'm thinking it would be cleaner to support an optional 3 argument to
>>>> emit
>>>> with { _id:"", _rev:"" }?  The current two argument emit() would be the
>>>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>>>
>>>
>>> I think this is a pretty good idea. Though unless I'm missing
>>> something the implementation difficulty rises noticeably. The only
>>> initial drawback I see is how we explain the semantics of default
>>> behavior. For ?include_docs=true it was simply "current version or
>>> version specified by _rev". Obviously adding _id makes that weirder,
>>> but I'm don't see a more clear explanation with the third parameter
>>> version.
>>>
>>> Paul Davis
>>
>>
>> A three parameter version would also allow like:
>>
>> emit(key, value, null);
>>
>> when you intentionally do not want any documents provided if the user
>> specifies include_docs=true.
>>
>>
>
> I don't see a use case for the 3 parameter version.
>

I think its more about having a clean API. Using members on the value
does confuse the semantics quite a bit. Pragmatically though, it
wouldn't really change the end result.

> If you are writing a view to take advantage of the linking feature,
> you would know that you are working to specify the linked doc _id and
> perhaps _rev in the value.
>
> If you aren't planning on using include docs, or just want the normal
> version of the feature, (include the doc that emitted) then you can
> just ignore the whole thing.
>
> Generally I'm leaning strongly toward keep-it-super-simple, as
> anything that a user wants to do is possible with the API as it
> exists. (If you want to emit a full doc but link to another, you can
> stick the emitted doc in an envelope.)
>
> Chris
>
>
>
> --
> Chris Anderson
> http://jchrisa.net
> http://couch.io
>

While the third parameter option is definitely cleaner in terms of API
design, I would have to agree that the cost in terms of added
implementation complexity is a bit forbidding. For the moment without
attempting the implementation I'd leave it as is, but would be in
favor of the three parameter version if someone provides a patch.

Specifically, the added complexity is going to come from how the third
parameter is stored in the btree, how it affects sorting semantics,
and how it relates to reductions. And that's just off the top of my
head without trying to write the code.

Paul Davis

Re: Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Chris Anderson <jc...@apache.org>.
On Thu, Sep 17, 2009 at 7:11 AM, Curt Arnold <ca...@apache.org> wrote:
>
> On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>>>>
>>>> In Governator voice: "It's not a JOIN."
>>>>
>>>> But you can use it if you have a doc like:
>>>>
>>>> {"_id":"my-outline",
>>>> "other_docs":["docid,"other-docid"]
>>>> }
>>>>
>>>> and then a view like
>>>>
>>>> function(doc) {
>>>> for (var i=0; i < doc.other_docs.length; i++) {
>>>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>>>> };
>>>> }
>
>
> So if I do,
>
> emit(key, doc)
>
> am I specifying the key value and overriding the default document to be
> included since the value would contain _id and _rev members?  They'd have
> the same value, so it wouldn't be detectable, but still no clear boundary
> between the value parts of the parameter and the document part of the
> parameter.
>
>
>>
>>> I'm thinking it would be cleaner to support an optional 3 argument to
>>> emit
>>> with { _id:"", _rev:"" }?  The current two argument emit() would be the
>>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>>
>>
>> I think this is a pretty good idea. Though unless I'm missing
>> something the implementation difficulty rises noticeably. The only
>> initial drawback I see is how we explain the semantics of default
>> behavior. For ?include_docs=true it was simply "current version or
>> version specified by _rev". Obviously adding _id makes that weirder,
>> but I'm don't see a more clear explanation with the third parameter
>> version.
>>
>> Paul Davis
>
>
> A three parameter version would also allow like:
>
> emit(key, value, null);
>
> when you intentionally do not want any documents provided if the user
> specifies include_docs=true.
>
>

I don't see a use case for the 3 parameter version.

If you are writing a view to take advantage of the linking feature,
you would know that you are working to specify the linked doc _id and
perhaps _rev in the value.

If you aren't planning on using include docs, or just want the normal
version of the feature, (include the doc that emitted) then you can
just ignore the whole thing.

Generally I'm leaning strongly toward keep-it-super-simple, as
anything that a user wants to do is possible with the API as it
exists. (If you want to emit a full doc but link to another, you can
stick the emitted doc in an envelope.)

Chris



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Its not a JOIN (was Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl)

Posted by Curt Arnold <ca...@apache.org>.
On Sep 16, 2009, at 11:51 PM, Paul Davis wrote:
>>>
>>> In Governator voice: "It's not a JOIN."
>>>
>>> But you can use it if you have a doc like:
>>>
>>> {"_id":"my-outline",
>>> "other_docs":["docid,"other-docid"]
>>> }
>>>
>>> and then a view like
>>>
>>> function(doc) {
>>> for (var i=0; i < doc.other_docs.length; i++) {
>>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>>> };
>>> }


So if I do,

emit(key, doc)

am I specifying the key value and overriding the default document to  
be included since the value would contain _id and _rev members?   
They'd have the same value, so it wouldn't be detectable, but still no  
clear boundary between the value parts of the parameter and the  
document part of the parameter.


>
>> I'm thinking it would be cleaner to support an optional 3 argument  
>> to emit
>> with { _id:"", _rev:"" }?  The current two argument emit() would be  
>> the
>> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>>
>
> I think this is a pretty good idea. Though unless I'm missing
> something the implementation difficulty rises noticeably. The only
> initial drawback I see is how we explain the semantics of default
> behavior. For ?include_docs=true it was simply "current version or
> version specified by _rev". Obviously adding _id makes that weirder,
> but I'm don't see a more clear explanation with the third parameter
> version.
>
> Paul Davis


A three parameter version would also allow like:

emit(key, value, null);

when you intentionally do not want any documents provided if the user  
specifies include_docs=true.


Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Paul Davis <pa...@gmail.com>.
On Wed, Sep 16, 2009 at 10:26 PM, Curt Arnold <cu...@gmail.com> wrote:
>
> On Sep 16, 2009, at 5:42 PM, Chris Anderson wrote:
>
>> On Wed, Sep 16, 2009 at 3:04 PM,  <jc...@apache.org> wrote:
>>>
>>> Author: jchris
>>> Date: Wed Sep 16 22:04:18 2009
>>> New Revision: 815984
>>>
>>> URL: http://svn.apache.org/viewvc?rev=815984&view=rev
>>> Log:
>>> include_docs now take an _id (as well as a _rev) in the emitted value, to
>>> load docs other than the one doing the emitting. This means you can have one
>>> doc list a set of other docs to load in a single query. Enjoy!
>>>
>>
>> In Governator voice: "It's not a JOIN."
>>
>> But you can use it if you have a doc like:
>>
>> {"_id":"my-outline",
>> "other_docs":["docid,"other-docid"]
>> }
>>
>> and then a view like
>>
>> function(doc) {
>> for (var i=0; i < doc.other_docs.length; i++) {
>>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
>> };
>> }
>>
>> and then you will have an ordered list of the other docs available in
>> a view query (with include_docs) You can also specify the other docs
>> _rev if you want -- if it's not available it comes up as null in the
>> feed.
>>
>> Of course, the usual include_docs performance caveats apply, but this
>> time we get a feature out of it!
>>
>> Chris
>>
>
>
> I'm thinking that this may be sufficient to address the use case described
> in http://wiki.apache.org/couchdb/Forward_document_references.  I was uneasy
> about that proposal since it strongly favored using one document per entity
> when that model may not be the appropriate partitioning scheme, but I never
> bothered putting my thoughts down since it looked like it wasn't going any
> where fast.
>

I don't believe I've ever seen that wiki page before. Which is odd
because I watch all wiki edits to try and keep abreast of exactly this
sort of thing.

I would say yes, Chris's patch addresses almost this exactly though.

I don't think I see how this would affect a partitioning scheme. I
guess it could make people more willing to split documents that might
be better served as a single doc, but that'd be the same regardless
with features like multi-key fetch.

> This appears to be better, but it seems to be assigning some magic behavior
> to the second argument so it specifies both the value.  Was there previous
> discussion or a bug report filed on this that I missed?

There wasn't a ticket, but the new bit is only a minor tweak to a
lesser know feature. Using the _rev member of an emitted object comes
from the ?include_docs=true feature. When I wrote that I fretted over
the race condition of pulling a doc revision that was different than
the one that emitted the row. The best answer at the time was "allow
people to specify a _rev" which was all and good.

The new bit just extends that slightly.

> I'm thinking it would be cleaner to support an optional 3 argument to emit
> with { _id:"", _rev:"" }?  The current two argument emit() would be the
> equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).
>

I think this is a pretty good idea. Though unless I'm missing
something the implementation difficulty rises noticeably. The only
initial drawback I see is how we explain the semantics of default
behavior. For ?include_docs=true it was simply "current version or
version specified by _rev". Obviously adding _id makes that weirder,
but I'm don't see a more clear explanation with the third parameter
version.

Paul Davis

Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Curt Arnold <cu...@gmail.com>.
On Sep 16, 2009, at 5:42 PM, Chris Anderson wrote:

> On Wed, Sep 16, 2009 at 3:04 PM,  <jc...@apache.org> wrote:
>> Author: jchris
>> Date: Wed Sep 16 22:04:18 2009
>> New Revision: 815984
>>
>> URL: http://svn.apache.org/viewvc?rev=815984&view=rev
>> Log:
>> include_docs now take an _id (as well as a _rev) in the emitted  
>> value, to load docs other than the one doing the emitting. This  
>> means you can have one doc list a set of other docs to load in a  
>> single query. Enjoy!
>>
>
> In Governator voice: "It's not a JOIN."
>
> But you can use it if you have a doc like:
>
> {"_id":"my-outline",
> "other_docs":["docid,"other-docid"]
> }
>
> and then a view like
>
> function(doc) {
> for (var i=0; i < doc.other_docs.length; i++) {
>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
> };
> }
>
> and then you will have an ordered list of the other docs available in
> a view query (with include_docs) You can also specify the other docs
> _rev if you want -- if it's not available it comes up as null in the
> feed.
>
> Of course, the usual include_docs performance caveats apply, but this
> time we get a feature out of it!
>
> Chris
>


I'm thinking that this may be sufficient to address the use case  
described in http://wiki.apache.org/couchdb/ 
Forward_document_references.  I was uneasy about that proposal since  
it strongly favored using one document per entity when that model may  
not be the appropriate partitioning scheme, but I never bothered  
putting my thoughts down since it looked like it wasn't going any  
where fast.

This appears to be better, but it seems to be assigning some magic  
behavior to the second argument so it specifies both the value.  Was  
there previous discussion or a bug report filed on this that I missed?

I'm thinking it would be cleaner to support an optional 3 argument to  
emit with { _id:"", _rev:"" }?  The current two argument emit() would  
be the equivalent of emit(key, value, { _id:doc._id, _rev:doc.rev}).






Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Paul Davis <pa...@gmail.com>.
On Wed, Sep 16, 2009 at 6:42 PM, Chris Anderson <jc...@apache.org> wrote:
> On Wed, Sep 16, 2009 at 3:04 PM,  <jc...@apache.org> wrote:
>> Author: jchris
>> Date: Wed Sep 16 22:04:18 2009
>> New Revision: 815984
>>
>> URL: http://svn.apache.org/viewvc?rev=815984&view=rev
>> Log:
>> include_docs now take an _id (as well as a _rev) in the emitted value, to load docs other than the one doing the emitting. This means you can have one doc list a set of other docs to load in a single query. Enjoy!
>>
>
> In Governator voice: "It's not a JOIN."
>

Good to see you're acclimating to SF. :)

> But you can use it if you have a doc like:
>
> {"_id":"my-outline",
> "other_docs":["docid,"other-docid"]
> }
>
> and then a view like
>
> function(doc) {
> for (var i=0; i < doc.other_docs.length; i++) {
>  emit([doc._id, i], {"_id" : doc.other_docs[i]}
> };
> }
>
> and then you will have an ordered list of the other docs available in
> a view query (with include_docs) You can also specify the other docs
> _rev if you want -- if it's not available it comes up as null in the
> feed.
>
> Of course, the usual include_docs performance caveats apply, but this
> time we get a feature out of it!
>
> Chris
>
> --
> Chris Anderson
> http://jchrisa.net
> http://couch.io
>

I'm definitely keen on this idea, but a thought:

The current patch is basically a limited version of Riak's Jaywalker.
Once i saw that I immediately started contemplating how to add a
similar feature to couch. I got hung up on the fact that Riak uses
more structure in their docs to provide a links field. When Chris
today mentioned emitting a value that had an _id variable I was
hooked.

So while the basic premise is quite awesome, I've got a slightly more
generic idea, instead of using emit(key, {_id: foo}) why not us {_key:
foo} and it looks up rows in the view. Its subtle, but the extension
is that it allows for further chaining. And then to get the actual doc
we recognize ?include_docs=true and then any returned row gets that
doc.

The one downfall of this is that it falls victim to the same
controversy over POST for multi-key fetch as its an unbounded length
value but URLs would be limiting. Right now I'd vote for adding URL
multi-key and then adding the _key follow as either a URL endpoint or
a POST body or url parameter.

Still, way cool feature.

Paul

Re: svn commit: r815984 - in /couchdb/trunk: share/www/script/test/view_include_docs.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_httpd_view.erl

Posted by Chris Anderson <jc...@apache.org>.
On Wed, Sep 16, 2009 at 3:04 PM,  <jc...@apache.org> wrote:
> Author: jchris
> Date: Wed Sep 16 22:04:18 2009
> New Revision: 815984
>
> URL: http://svn.apache.org/viewvc?rev=815984&view=rev
> Log:
> include_docs now take an _id (as well as a _rev) in the emitted value, to load docs other than the one doing the emitting. This means you can have one doc list a set of other docs to load in a single query. Enjoy!
>

In Governator voice: "It's not a JOIN."

But you can use it if you have a doc like:

{"_id":"my-outline",
"other_docs":["docid,"other-docid"]
}

and then a view like

function(doc) {
for (var i=0; i < doc.other_docs.length; i++) {
  emit([doc._id, i], {"_id" : doc.other_docs[i]}
};
}

and then you will have an ordered list of the other docs available in
a view query (with include_docs) You can also specify the other docs
_rev if you want -- if it's not available it comes up as null in the
feed.

Of course, the usual include_docs performance caveats apply, but this
time we get a feature out of it!

Chris

-- 
Chris Anderson
http://jchrisa.net
http://couch.io