You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@couchdb.apache.org by Stefan Kögl <ko...@gmail.com> on 2012/03/18 19:08:42 UTC

{error,emfile} on CouchDB 1.2.x

Hi,

Another thing I noticed during my tests of CouchDB 1.2.x. I redirected
live traffic to the instance and after a rather short time, requests
were failing with the following information in the logs:


[Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
{error_report,<0.31.0>,
                                    {<0.27554.2>,std_error,
                                     [{application,mochiweb},
                                      "Accept failed error",
                                      "{error,emfile}"]}}
[Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
{error_report,<0.31.0>,
                          {<0.27554.2>,crash_report,
                           [[{initial_call,
                                 {mochiweb_acceptor,init,
                                     ['Argument__1','Argument__2',
                                      'Argument__3']}},
                             {pid,<0.27554.2>},
                             {registered_name,[]},
                             {error_info,
                                 {exit,
                                     {error,accept_failed},
                                     [{mochiweb_acceptor,init,3},
                                      {proc_lib,init_p_do_apply,3}]}},
                             {ancestors,
                                 [couch_httpd,couch_secondary_services,
                                  couch_server_sup,<0.32.0>]},
                             {messages,[]},
                             {links,[<0.129.0>]},
                             {dictionary,[]},
                             {trap_exit,false},
                             {status,running},
                             {heap_size,233},
                             {stack_size,24},
                             {reductions,244}],
                            []]}}


I think "emfile" means that CouchDB (or mochiweb?) couldn't open any
more files / connections. I've set the (hard and soft) nofile limit for
user couchdb to 4096, but didn't raise the ERL_MAX_PORTS accordingly.
Anyway, as soon as the error occured, CouchDB started writing most of my
view files from scratch, rendering the instance unusable.

I'd expect CouchDB to fail more gracefully when the maximum number of
open files is reached. Is this a bug or expected behaviour?


-- Stefan


Re: {error,emfile} on CouchDB 1.2.x

Posted by Robert Newson <rn...@apache.org>.
I'd rather improve the handling in prepare_group/4, I'd hope we could
explicitly enumerate the cases where deleting the view index is a
reasonable response than do it quite this capriciously.

B.


On 18 March 2012 20:28, Randall Leeds <ra...@gmail.com> wrote:
> On Sun, Mar 18, 2012 at 11:08, Stefan Kögl <ko...@gmail.com> wrote:
>
>> Hi,
>>
>> Another thing I noticed during my tests of CouchDB 1.2.x. I redirected
>> live traffic to the instance and after a rather short time, requests
>> were failing with the following information in the logs:
>>
>>
>> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
>> {error_report,<0.31.0>,
>>                                    {<0.27554.2>,std_error,
>>                                     [{application,mochiweb},
>>                                      "Accept failed error",
>>                                      "{error,emfile}"]}}
>> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
>> {error_report,<0.31.0>,
>>                          {<0.27554.2>,crash_report,
>>                           [[{initial_call,
>>                                 {mochiweb_acceptor,init,
>>                                     ['Argument__1','Argument__2',
>>                                      'Argument__3']}},
>>                             {pid,<0.27554.2>},
>>                             {registered_name,[]},
>>                             {error_info,
>>                                 {exit,
>>                                     {error,accept_failed},
>>                                     [{mochiweb_acceptor,init,3},
>>                                      {proc_lib,init_p_do_apply,3}]}},
>>                             {ancestors,
>>                                 [couch_httpd,couch_secondary_services,
>>                                  couch_server_sup,<0.32.0>]},
>>                             {messages,[]},
>>                             {links,[<0.129.0>]},
>>                             {dictionary,[]},
>>                             {trap_exit,false},
>>                             {status,running},
>>                             {heap_size,233},
>>                             {stack_size,24},
>>                             {reductions,244}],
>>                            []]}}
>>
>>
>> I think "emfile" means that CouchDB (or mochiweb?) couldn't open any
>> more files / connections. I've set the (hard and soft) nofile limit for
>> user couchdb to 4096, but didn't raise the ERL_MAX_PORTS accordingly.
>> Anyway, as soon as the error occured, CouchDB started writing most of my
>> view files from scratch, rendering the instance unusable.
>>
>> I'd expect CouchDB to fail more gracefully when the maximum number of
>> open files is reached. Is this a bug or expected behaviour?
>>
>
> Looks like a bug. Whenever there's a problem opening a view file,
> couch_view tries to delete it. Clearly, this is not the right course of
> action when the problem is due to emfile.
>
> Here's a patch that I propose might fix it. I'd like to hear from another
> dev on this, or if there's a better way we should bail out.
>
> diff --git a/src/couchdb/couch_view_group.erl
> b/src/couchdb/couch_view_group.erl
> index 97fc512..ab075bd 100644
> --- a/src/couchdb/couch_view_group.erl
> +++ b/src/couchdb/couch_view_group.erl
> @@ -469,6 +469,10 @@ open_index_file(RootDir, DbName, GroupSig) ->
>     case couch_file:open(FileName) of
>     {ok, Fd}        -> {ok, Fd};
>     {error, enoent} -> couch_file:open(FileName, [create]);
> +    {error, emfile} ->
> +        ?LOG_ERROR("Could not open file for view index: max open files
> reached. "
> +                   "Raise ERL_MAX_PORTS or system limits.", []),
> +        throw({error, emfile});
>     Error           -> Error
>     end.

Re: {error,emfile} on CouchDB 1.2.x

Posted by Dave Cottlehuber <da...@muse.net.nz>.
On 19 March 2012 10:28, Jan Lehnardt <ja...@apache.org> wrote:
>
> On Mar 19, 2012, at 09:31 , Randall Leeds wrote:
>
>> Fixed on 1.2.x and 1.1.x. Need to sleep and take a look at how I want to
>> handle it on master.
>> Thanks again for picking up on this one, Stefan. It's been in there since
>> forever and I'd definitely seen the symptom without knowing the cause.
>
> Thanks Randall and Paul for nailing this one "over night" :)
>
> I ran it through make distcheck and the browser test suite and got all A's.
>
> Back to Noah.
>
> Cheers
> Jan
> --
>


Ditto, LGTME. Roll this puppy!

Re: {error,emfile} on CouchDB 1.2.x

Posted by Jan Lehnardt <ja...@apache.org>.
On Mar 19, 2012, at 09:31 , Randall Leeds wrote:

> Fixed on 1.2.x and 1.1.x. Need to sleep and take a look at how I want to
> handle it on master.
> Thanks again for picking up on this one, Stefan. It's been in there since
> forever and I'd definitely seen the symptom without knowing the cause.

Thanks Randall and Paul for nailing this one "over night" :)

I ran it through make distcheck and the browser test suite and got all A's.

Back to Noah.

Cheers
Jan
-- 


Re: {error,emfile} on CouchDB 1.2.x

Posted by Stefan Kögl <ko...@gmail.com>.
On Mon, Mar 19, 2012 at 9:31 AM, Randall Leeds <ra...@gmail.com> wrote:
> Fixed on 1.2.x and 1.1.x. Need to sleep and take a look at how I want to
> handle it on master.
> Thanks again for picking up on this one, Stefan. It's been in there since
> forever and I'd definitely seen the symptom without knowing the cause.

Thanks for taking care of this so quickly -- looking forward to a
great release :)


-- Stefan

Re: {error,emfile} on CouchDB 1.2.x

Posted by Randall Leeds <ra...@gmail.com>.
Fixed on 1.2.x and 1.1.x. Need to sleep and take a look at how I want to
handle it on master.
Thanks again for picking up on this one, Stefan. It's been in there since
forever and I'd definitely seen the symptom without knowing the cause.

-R

>
>

Re: {error,emfile} on CouchDB 1.2.x

Posted by Jan Lehnardt <ja...@apache.org>.
On Mar 18, 2012, at 21:46 , Randall Leeds wrote:

> On Sun, Mar 18, 2012 at 13:39, Jan Lehnardt <ja...@apache.org> wrote:
> 
>> 
>> On Mar 18, 2012, at 21:28 , Randall Leeds wrote:
>> 
>>> On Sun, Mar 18, 2012 at 11:08, Stefan Kögl <ko...@gmail.com>
>> wrote:
>>> 
>>>> Hi,
>>>> 
>>>> Another thing I noticed during my tests of CouchDB 1.2.x. I redirected
>>>> live traffic to the instance and after a rather short time, requests
>>>> were failing with the following information in the logs:
>>>> 
>>>> 
>>>> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
>>>> {error_report,<0.31.0>,
>>>>                                  {<0.27554.2>,std_error,
>>>>                                   [{application,mochiweb},
>>>>                                    "Accept failed error",
>>>>                                    "{error,emfile}"]}}
>>>> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
>>>> {error_report,<0.31.0>,
>>>>                        {<0.27554.2>,crash_report,
>>>>                         [[{initial_call,
>>>>                               {mochiweb_acceptor,init,
>>>>                                   ['Argument__1','Argument__2',
>>>>                                    'Argument__3']}},
>>>>                           {pid,<0.27554.2>},
>>>>                           {registered_name,[]},
>>>>                           {error_info,
>>>>                               {exit,
>>>>                                   {error,accept_failed},
>>>>                                   [{mochiweb_acceptor,init,3},
>>>>                                    {proc_lib,init_p_do_apply,3}]}},
>>>>                           {ancestors,
>>>>                               [couch_httpd,couch_secondary_services,
>>>>                                couch_server_sup,<0.32.0>]},
>>>>                           {messages,[]},
>>>>                           {links,[<0.129.0>]},
>>>>                           {dictionary,[]},
>>>>                           {trap_exit,false},
>>>>                           {status,running},
>>>>                           {heap_size,233},
>>>>                           {stack_size,24},
>>>>                           {reductions,244}],
>>>>                          []]}}
>>>> 
>>>> 
>>>> I think "emfile" means that CouchDB (or mochiweb?) couldn't open any
>>>> more files / connections. I've set the (hard and soft) nofile limit for
>>>> user couchdb to 4096, but didn't raise the ERL_MAX_PORTS accordingly.
>>>> Anyway, as soon as the error occured, CouchDB started writing most of my
>>>> view files from scratch, rendering the instance unusable.
>>>> 
>>>> I'd expect CouchDB to fail more gracefully when the maximum number of
>>>> open files is reached. Is this a bug or expected behaviour?
>>>> 
>>> 
>>> Looks like a bug. Whenever there's a problem opening a view file,
>>> couch_view tries to delete it. Clearly, this is not the right course of
>>> action when the problem is due to emfile.
>> 
>> This looks rather serious. I opened a JIRA:
>> 
>> https://issues.apache.org/jira/browse/COUCHDB-1445
>> 
>> And started collecting the info. Bob N's message came in in the meantime
>> and I agree, we should see if there's more cases where we need to be
>> careful.
>> 
>> Also, I'd consider this blocking for 1.2.0.
>> 
>> Anyone who can pitch in with their expertise is more than welcome! :)
>> 
> 
> Assigned to me. Patch forthcoming. Agree in should block 1.2.0, especially
> because upgrades are the sort of things where bad packaging downstream
> might cause custom ERL_MAX_PORTS settings to be overwritten and we wouldn't
> want anyone's production to have its views erased needlessly.

Thanks for taking this on Randall!

Cheers
Jan
-- 

> 
> -Randall
> 
> 
>> 
>> Cheers
>> Jan
>> --
>> 
>> 
>>> 
>>> Here's a patch that I propose might fix it. I'd like to hear from another
>>> dev on this, or if there's a better way we should bail out.
>>> 
>>> diff --git a/src/couchdb/couch_view_group.erl
>>> b/src/couchdb/couch_view_group.erl
>>> index 97fc512..ab075bd 100644
>>> --- a/src/couchdb/couch_view_group.erl
>>> +++ b/src/couchdb/couch_view_group.erl
>>> @@ -469,6 +469,10 @@ open_index_file(RootDir, DbName, GroupSig) ->
>>>    case couch_file:open(FileName) of
>>>    {ok, Fd}        -> {ok, Fd};
>>>    {error, enoent} -> couch_file:open(FileName, [create]);
>>> +    {error, emfile} ->
>>> +        ?LOG_ERROR("Could not open file for view index: max open files
>>> reached. "
>>> +                   "Raise ERL_MAX_PORTS or system limits.", []),
>>> +        throw({error, emfile});
>>>    Error           -> Error
>>>    end.
>> 
>> 


Re: {error,emfile} on CouchDB 1.2.x

Posted by Randall Leeds <ra...@gmail.com>.
On Sun, Mar 18, 2012 at 13:39, Jan Lehnardt <ja...@apache.org> wrote:

>
> On Mar 18, 2012, at 21:28 , Randall Leeds wrote:
>
> > On Sun, Mar 18, 2012 at 11:08, Stefan Kögl <ko...@gmail.com>
> wrote:
> >
> >> Hi,
> >>
> >> Another thing I noticed during my tests of CouchDB 1.2.x. I redirected
> >> live traffic to the instance and after a rather short time, requests
> >> were failing with the following information in the logs:
> >>
> >>
> >> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
> >> {error_report,<0.31.0>,
> >>                                   {<0.27554.2>,std_error,
> >>                                    [{application,mochiweb},
> >>                                     "Accept failed error",
> >>                                     "{error,emfile}"]}}
> >> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
> >> {error_report,<0.31.0>,
> >>                         {<0.27554.2>,crash_report,
> >>                          [[{initial_call,
> >>                                {mochiweb_acceptor,init,
> >>                                    ['Argument__1','Argument__2',
> >>                                     'Argument__3']}},
> >>                            {pid,<0.27554.2>},
> >>                            {registered_name,[]},
> >>                            {error_info,
> >>                                {exit,
> >>                                    {error,accept_failed},
> >>                                    [{mochiweb_acceptor,init,3},
> >>                                     {proc_lib,init_p_do_apply,3}]}},
> >>                            {ancestors,
> >>                                [couch_httpd,couch_secondary_services,
> >>                                 couch_server_sup,<0.32.0>]},
> >>                            {messages,[]},
> >>                            {links,[<0.129.0>]},
> >>                            {dictionary,[]},
> >>                            {trap_exit,false},
> >>                            {status,running},
> >>                            {heap_size,233},
> >>                            {stack_size,24},
> >>                            {reductions,244}],
> >>                           []]}}
> >>
> >>
> >> I think "emfile" means that CouchDB (or mochiweb?) couldn't open any
> >> more files / connections. I've set the (hard and soft) nofile limit for
> >> user couchdb to 4096, but didn't raise the ERL_MAX_PORTS accordingly.
> >> Anyway, as soon as the error occured, CouchDB started writing most of my
> >> view files from scratch, rendering the instance unusable.
> >>
> >> I'd expect CouchDB to fail more gracefully when the maximum number of
> >> open files is reached. Is this a bug or expected behaviour?
> >>
> >
> > Looks like a bug. Whenever there's a problem opening a view file,
> > couch_view tries to delete it. Clearly, this is not the right course of
> > action when the problem is due to emfile.
>
> This looks rather serious. I opened a JIRA:
>
>  https://issues.apache.org/jira/browse/COUCHDB-1445
>
> And started collecting the info. Bob N's message came in in the meantime
> and I agree, we should see if there's more cases where we need to be
> careful.
>
> Also, I'd consider this blocking for 1.2.0.
>
> Anyone who can pitch in with their expertise is more than welcome! :)
>

Assigned to me. Patch forthcoming. Agree in should block 1.2.0, especially
because upgrades are the sort of things where bad packaging downstream
might cause custom ERL_MAX_PORTS settings to be overwritten and we wouldn't
want anyone's production to have its views erased needlessly.

-Randall


>
> Cheers
> Jan
> --
>
>
> >
> > Here's a patch that I propose might fix it. I'd like to hear from another
> > dev on this, or if there's a better way we should bail out.
> >
> > diff --git a/src/couchdb/couch_view_group.erl
> > b/src/couchdb/couch_view_group.erl
> > index 97fc512..ab075bd 100644
> > --- a/src/couchdb/couch_view_group.erl
> > +++ b/src/couchdb/couch_view_group.erl
> > @@ -469,6 +469,10 @@ open_index_file(RootDir, DbName, GroupSig) ->
> >     case couch_file:open(FileName) of
> >     {ok, Fd}        -> {ok, Fd};
> >     {error, enoent} -> couch_file:open(FileName, [create]);
> > +    {error, emfile} ->
> > +        ?LOG_ERROR("Could not open file for view index: max open files
> > reached. "
> > +                   "Raise ERL_MAX_PORTS or system limits.", []),
> > +        throw({error, emfile});
> >     Error           -> Error
> >     end.
>
>

Re: {error,emfile} on CouchDB 1.2.x

Posted by Jan Lehnardt <ja...@apache.org>.
On Mar 18, 2012, at 21:28 , Randall Leeds wrote:

> On Sun, Mar 18, 2012 at 11:08, Stefan Kögl <ko...@gmail.com> wrote:
> 
>> Hi,
>> 
>> Another thing I noticed during my tests of CouchDB 1.2.x. I redirected
>> live traffic to the instance and after a rather short time, requests
>> were failing with the following information in the logs:
>> 
>> 
>> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
>> {error_report,<0.31.0>,
>>                                   {<0.27554.2>,std_error,
>>                                    [{application,mochiweb},
>>                                     "Accept failed error",
>>                                     "{error,emfile}"]}}
>> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
>> {error_report,<0.31.0>,
>>                         {<0.27554.2>,crash_report,
>>                          [[{initial_call,
>>                                {mochiweb_acceptor,init,
>>                                    ['Argument__1','Argument__2',
>>                                     'Argument__3']}},
>>                            {pid,<0.27554.2>},
>>                            {registered_name,[]},
>>                            {error_info,
>>                                {exit,
>>                                    {error,accept_failed},
>>                                    [{mochiweb_acceptor,init,3},
>>                                     {proc_lib,init_p_do_apply,3}]}},
>>                            {ancestors,
>>                                [couch_httpd,couch_secondary_services,
>>                                 couch_server_sup,<0.32.0>]},
>>                            {messages,[]},
>>                            {links,[<0.129.0>]},
>>                            {dictionary,[]},
>>                            {trap_exit,false},
>>                            {status,running},
>>                            {heap_size,233},
>>                            {stack_size,24},
>>                            {reductions,244}],
>>                           []]}}
>> 
>> 
>> I think "emfile" means that CouchDB (or mochiweb?) couldn't open any
>> more files / connections. I've set the (hard and soft) nofile limit for
>> user couchdb to 4096, but didn't raise the ERL_MAX_PORTS accordingly.
>> Anyway, as soon as the error occured, CouchDB started writing most of my
>> view files from scratch, rendering the instance unusable.
>> 
>> I'd expect CouchDB to fail more gracefully when the maximum number of
>> open files is reached. Is this a bug or expected behaviour?
>> 
> 
> Looks like a bug. Whenever there's a problem opening a view file,
> couch_view tries to delete it. Clearly, this is not the right course of
> action when the problem is due to emfile.

This looks rather serious. I opened a JIRA: 

  https://issues.apache.org/jira/browse/COUCHDB-1445

And started collecting the info. Bob N's message came in in the meantime
and I agree, we should see if there's more cases where we need to be careful.

Also, I'd consider this blocking for 1.2.0.

Anyone who can pitch in with their expertise is more than welcome! :)

Cheers
Jan
-- 


> 
> Here's a patch that I propose might fix it. I'd like to hear from another
> dev on this, or if there's a better way we should bail out.
> 
> diff --git a/src/couchdb/couch_view_group.erl
> b/src/couchdb/couch_view_group.erl
> index 97fc512..ab075bd 100644
> --- a/src/couchdb/couch_view_group.erl
> +++ b/src/couchdb/couch_view_group.erl
> @@ -469,6 +469,10 @@ open_index_file(RootDir, DbName, GroupSig) ->
>     case couch_file:open(FileName) of
>     {ok, Fd}        -> {ok, Fd};
>     {error, enoent} -> couch_file:open(FileName, [create]);
> +    {error, emfile} ->
> +        ?LOG_ERROR("Could not open file for view index: max open files
> reached. "
> +                   "Raise ERL_MAX_PORTS or system limits.", []),
> +        throw({error, emfile});
>     Error           -> Error
>     end.


Re: {error,emfile} on CouchDB 1.2.x

Posted by Randall Leeds <ra...@gmail.com>.
On Sun, Mar 18, 2012 at 11:08, Stefan Kögl <ko...@gmail.com> wrote:

> Hi,
>
> Another thing I noticed during my tests of CouchDB 1.2.x. I redirected
> live traffic to the instance and after a rather short time, requests
> were failing with the following information in the logs:
>
>
> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
> {error_report,<0.31.0>,
>                                    {<0.27554.2>,std_error,
>                                     [{application,mochiweb},
>                                      "Accept failed error",
>                                      "{error,emfile}"]}}
> [Sun, 18 Mar 2012 16:39:24 GMT] [error] [<0.27554.2>]
> {error_report,<0.31.0>,
>                          {<0.27554.2>,crash_report,
>                           [[{initial_call,
>                                 {mochiweb_acceptor,init,
>                                     ['Argument__1','Argument__2',
>                                      'Argument__3']}},
>                             {pid,<0.27554.2>},
>                             {registered_name,[]},
>                             {error_info,
>                                 {exit,
>                                     {error,accept_failed},
>                                     [{mochiweb_acceptor,init,3},
>                                      {proc_lib,init_p_do_apply,3}]}},
>                             {ancestors,
>                                 [couch_httpd,couch_secondary_services,
>                                  couch_server_sup,<0.32.0>]},
>                             {messages,[]},
>                             {links,[<0.129.0>]},
>                             {dictionary,[]},
>                             {trap_exit,false},
>                             {status,running},
>                             {heap_size,233},
>                             {stack_size,24},
>                             {reductions,244}],
>                            []]}}
>
>
> I think "emfile" means that CouchDB (or mochiweb?) couldn't open any
> more files / connections. I've set the (hard and soft) nofile limit for
> user couchdb to 4096, but didn't raise the ERL_MAX_PORTS accordingly.
> Anyway, as soon as the error occured, CouchDB started writing most of my
> view files from scratch, rendering the instance unusable.
>
> I'd expect CouchDB to fail more gracefully when the maximum number of
> open files is reached. Is this a bug or expected behaviour?
>

Looks like a bug. Whenever there's a problem opening a view file,
couch_view tries to delete it. Clearly, this is not the right course of
action when the problem is due to emfile.

Here's a patch that I propose might fix it. I'd like to hear from another
dev on this, or if there's a better way we should bail out.

diff --git a/src/couchdb/couch_view_group.erl
b/src/couchdb/couch_view_group.erl
index 97fc512..ab075bd 100644
--- a/src/couchdb/couch_view_group.erl
+++ b/src/couchdb/couch_view_group.erl
@@ -469,6 +469,10 @@ open_index_file(RootDir, DbName, GroupSig) ->
     case couch_file:open(FileName) of
     {ok, Fd}        -> {ok, Fd};
     {error, enoent} -> couch_file:open(FileName, [create]);
+    {error, emfile} ->
+        ?LOG_ERROR("Could not open file for view index: max open files
reached. "
+                   "Raise ERL_MAX_PORTS or system limits.", []),
+        throw({error, emfile});
     Error           -> Error
     end.