You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafficserver.apache.org by "Leif Hedstrom (JIRA)" <ji...@apache.org> on 2015/03/19 22:00:38 UTC

[jira] [Commented] (TS-3455) Marking the cache STALE in lookup-complete causes abort()

    [ https://issues.apache.org/jira/browse/TS-3455?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14370102#comment-14370102 ] 

Leif Hedstrom commented on TS-3455:
-----------------------------------

I suggested the following patch, which is dangerous at best since we're not sure why this is happening with this plugin. However, just looking at the code, this is what looks to make most sense.

{code}
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index 0078ef1..852f285 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
 //        ink_release_assert(s->current.request_to == PARENT_PROXY ||
 //                    s->http_config_param->no_dns_forward_to_parent != 0);
 
-        // Set ourselves up to handle pending revalidate issues
-        //  after the PP DNS lookup
-        ink_assert(s->pending_work == NULL);
-        s->pending_work = issue_revalidate;
-
         // We must be going a PARENT PROXY since so did
         //  origin server DNS lookup right after state Start
         //
@@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
         //  missing ip but we won't take down the system
         //
         if (s->current.request_to == PARENT_PROXY) {
+          // Set ourselves up to handle pending revalidate issues
+          //  after the PP DNS lookup
+          ink_assert(s->pending_work == NULL);
+          s->pending_work = issue_revalidate;
+
           TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
         } else if (s->current.request_to == ORIGIN_SERVER) {
           TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
{code}

> Marking the cache STALE in lookup-complete causes abort()
> ---------------------------------------------------------
>
>                 Key: TS-3455
>                 URL: https://issues.apache.org/jira/browse/TS-3455
>             Project: Traffic Server
>          Issue Type: Bug
>            Reporter: Luca Bruno
>             Fix For: 6.0.0
>
>
> I've written a simple test case plugin for demonstrating this problem, not sure if it's a problem on my side, but that would also mean that the regex invalidate plugin would also abort().
> What the plugin does: in LOOKUP_COMPLETE, if the cache status is FRESH then set it to STALE.
> To reproduce:
> 1) Send a first cacheable request to ATS, which gets cached.
> 2) Request again the same url, the plugin triggers and set the cache to STALE. Then ATS does abort().
> Plugin code:
> {noformat}
> #include <ts/ts.h>
> #include <ts/remap.h>
> #include <ts/experimental.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <getopt.h>
> #include <string.h>
> #include <string>
> #include <iterator>
> #include <map>
> const char PLUGIN_NAME[] = "maybebug";
> static int Handler(TSCont cont, TSEvent event, void *edata);
> struct PluginState {
>   PluginState()
>   {
>     cont = TSContCreate(Handler, NULL);
>     TSContDataSet(cont, this);
>   }
>   ~PluginState()
>   {
>     TSContDestroy(cont);
>   }
>   TSCont cont;
> };
> static int Handler(TSCont cont, TSEvent event, void* edata) {
>   TSHttpTxn txn = (TSHttpTxn)edata;
>   if (event == TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE) {
>     int lookup_status;
>     if (TS_SUCCESS == TSHttpTxnCacheLookupStatusGet(txn, &lookup_status)) {
>       TSDebug(PLUGIN_NAME, "lookup complete: %d", lookup_status);
>       if (lookup_status == TS_CACHE_LOOKUP_HIT_FRESH) {
>         TSDebug(PLUGIN_NAME, "set stale");
>         TSHttpTxnCacheLookupStatusSet(txn, TS_CACHE_LOOKUP_HIT_STALE);
>       }
>     }
>   }
>   TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
>   return TS_EVENT_NONE;
> }
> void TSPluginInit (int argc, const char *argv[]) {
>   TSPluginRegistrationInfo info;
>   info.plugin_name = strdup("cappello");
>   info.vendor_name = strdup("foo");
>   info.support_email = strdup("foo@bar.com");
>   if (TSPluginRegister(TS_SDK_VERSION_3_0 , &info) != TS_SUCCESS) {
>     TSError("Plugin registration failed");
>   }
>   PluginState* state = new PluginState();
>   TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, state->cont);
> }
> {noformat}
> Output:
> {noformat}
> [Mar 19 18:40:36.254] Server {0x7f6df0b4f740} DIAG: (maybebug) lookup complete: 0
> [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) lookup complete: 2
> [Mar 19 18:40:40.854] Server {0x7f6decfee700} DIAG: (maybebug) set stale
> FATAL: HttpTransact.cc:433: failed assert `s->pending_work == NULL`
> traffic_server - STACK TRACE: 
> /usr/local/lib/libtsutil.so.5(ink_fatal+0xa3)[0x7f6df072186d]
> /usr/local/lib/libtsutil.so.5(_Z12ink_get_randv+0x0)[0x7f6df071f3a0]
> traffic_server[0x60d0aa]
> traffic_server(_ZN12HttpTransact22HandleCacheOpenReadHitEPNS_5StateE+0xf82)[0x619206]
> ...
> {noformat}
> What happens in gdb is that HandleCacheOpenReadHit is called twice in the same request. The first time s->pending_work is NULL, the second time it's not NULL.
> The patch below fixes the problem:
> {noformat}
> diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
> index 0078ef1..852f285 100644
> --- a/proxy/http/HttpTransact.cc
> +++ b/proxy/http/HttpTransact.cc
> @@ -2641,11 +2641,6 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
>  //        ink_release_assert(s->current.request_to == PARENT_PROXY ||
>  //                    s->http_config_param->no_dns_forward_to_parent != 0);
>  
> -        // Set ourselves up to handle pending revalidate issues
> -        //  after the PP DNS lookup
> -        ink_assert(s->pending_work == NULL);
> -        s->pending_work = issue_revalidate;
> -
>          // We must be going a PARENT PROXY since so did
>          //  origin server DNS lookup right after state Start
>          //
> @@ -2654,6 +2649,11 @@ HttpTransact::HandleCacheOpenReadHit(State* s)
>          //  missing ip but we won't take down the system
>          //
>          if (s->current.request_to == PARENT_PROXY) {
> +          // Set ourselves up to handle pending revalidate issues
> +          //  after the PP DNS lookup
> +          ink_assert(s->pending_work == NULL);
> +          s->pending_work = issue_revalidate;
> +
>            TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, PPDNSLookup);
>          } else if (s->current.request_to == ORIGIN_SERVER) {
>            TRANSACT_RETURN(SM_ACTION_DNS_LOOKUP, OSDNSLookup);
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)