You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafficserver.apache.org by "Sudheer Vinukonda (JIRA)" <ji...@apache.org> on 2015/07/14 20:11:04 UTC
[jira] [Commented] (TS-3767) More unbounded retries within
read-while-writer breaking loop detection.
[ https://issues.apache.org/jira/browse/TS-3767?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14626795#comment-14626795 ]
Sudheer Vinukonda commented on TS-3767:
---------------------------------------
Below is a proposed patch:
{code}
diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
index 330aedd..f0610e4 100644
--- a/iocore/cache/Cache.cc
+++ b/iocore/cache/Cache.cc
@@ -80,6 +80,7 @@ int cache_config_enable_checksum = 0;
int cache_config_alt_rewrite_max_size = 4096;
int cache_config_read_while_writer = 0;
int cache_config_mutex_retry_delay = 2;
+int cache_config_writer_retry_delay = 50;
int cache_config_read_while_writer_max_retries = 10;
#ifdef HTTP_CACHE
static int enable_cache_empty_http_doc = 0;
@@ -3191,6 +3192,9 @@ ink_cache_init(ModuleVersion v)
REC_EstablishStaticConfigInt32(cache_config_read_while_writer_max_retries, "proxy.config.cache.read_while_writer.max_retries");
Debug("cache_init", "proxy.config.cache.read_while_writer.max_retries = %d", cache_config_read_while_writer_max_retries);
+ REC_EstablishStaticConfigInt32(cache_config_writer_retry_delay, "proxy.config.cache.writer_retry_delay");
+ Debug("cache_init", "proxy.config.cache.writer_retry_delay = %dms", cache_config_writer_retry_delay);
+
REC_EstablishStaticConfigInt32(cache_config_hit_evacuate_percent, "proxy.config.cache.hit_evacuate_percent");
Debug("cache_init", "proxy.config.cache.hit_evacuate_percent = %d", cache_config_hit_evacuate_percent);
diff --git a/iocore/cache/CacheRead.cc b/iocore/cache/CacheRead.cc
index c617ee4..2cbb3fe 100644
--- a/iocore/cache/CacheRead.cc
+++ b/iocore/cache/CacheRead.cc
@@ -336,7 +336,11 @@ CacheVC::openReadFromWriter(int event, Event *e)
return openReadStartHead(event, e);
} else if (ret == EVENT_CONT) {
ink_assert(!write_vc);
- VC_SCHED_WRITER_RETRY();
+ if (writer_lock_retry < cache_config_read_while_writer_max_retries) {
+ VC_SCHED_WRITER_RETRY();
+ } else {
+ return openReadFromWriterFailure(CACHE_EVENT_OPEN_READ_FAILED, (Event *) - err);
+ }
} else
ink_assert(write_vc);
} else {
@@ -587,8 +591,13 @@ CacheVC::openReadReadDone(int event, Event *e)
DDebug("cache_read_agg", "%p: key: %X ReadRead writer aborted: %d", this, first_key.slice32(1), (int)vio.ndone);
goto Lerror;
}
- DDebug("cache_read_agg", "%p: key: %X ReadRead retrying: %d", this, first_key.slice32(1), (int)vio.ndone);
- VC_SCHED_WRITER_RETRY(); // wait for writer
+ if (writer_lock_retry < cache_config_read_while_writer_max_retries) {
+ DDebug("cache_read_agg", "%p: key: %X ReadRead retrying: %d", this, first_key.slice32(1), (int)vio.ndone);
+ VC_SCHED_WRITER_RETRY(); // wait for writer
+ } else {
+ DDebug("cache_read_agg", "%p: key: %X ReadRead retries exhausted, bailing..: %d", this, first_key.slice32(1), (int)vio.ndone);
+ goto Ldone;
+ }
}
// fall through for truncated documents
}
diff --git a/iocore/cache/P_CacheInternal.h b/iocore/cache/P_CacheInternal.h
index eb3a161..000e014 100644
--- a/iocore/cache/P_CacheInternal.h
+++ b/iocore/cache/P_CacheInternal.h
@@ -59,8 +59,6 @@ struct EvacuationBlock;
#endif
#define AIO_SOFT_FAILURE -100000
-// retry read from writer delay
-#define WRITER_RETRY_DELAY HRTIME_MSECONDS(50)
#ifndef CACHE_LOCK_FAIL_RATE
#define CACHE_TRY_LOCK(_l, _m, _t) MUTEX_TRY_LOCK(_l, _m, _t)
@@ -96,13 +94,9 @@ struct EvacuationBlock;
do { \
ink_assert(!trigger); \
writer_lock_retry++; \
- ink_hrtime _t = WRITER_RETRY_DELAY; \
+ ink_hrtime _t = cache_config_writer_retry_delay; \
if (writer_lock_retry > 2) \
- _t = WRITER_RETRY_DELAY * 2; \
- else if (writer_lock_retry > 5) \
- _t = WRITER_RETRY_DELAY * 10; \
- else if (writer_lock_retry > 10) \
- _t = WRITER_RETRY_DELAY * 100; \
+ _t = cache_config_writer_retry_delay * 2; \
trigger = mutex->thread_holding->schedule_in_local(this, _t); \
return EVENT_CONT; \
} while (0)
@@ -221,6 +215,7 @@ extern int cache_config_hit_evacuate_size_limit;
extern int cache_config_force_sector_size;
extern int cache_config_target_fragment_size;
extern int cache_config_mutex_retry_delay;
+extern int cache_config_writer_retry_delay;
extern int cache_config_read_while_writer_max_retries;
// CacheVC
diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
index 4d847b0..49c2ce4 100644
--- a/mgmt/RecordsConfig.cc
+++ b/mgmt/RecordsConfig.cc
@@ -966,6 +966,8 @@ static const RecordElement RecordsConfig[] =
,
{RECT_CONFIG, "proxy.config.cache.read_while_writer.max_retries", RECD_INT, "10", RECU_DYNAMIC, RR_NULL, RECC_NULL, NULL, RECA_NULL}
,
+ {RECT_CONFIG, "proxy.config.cache.writer_retry_delay", RECD_INT, "50", RECU_DYNAMIC, RR_NULL, RECC_NULL, NULL, RECA_NULL}
+ ,
//##############################################################################
//#
{code}
> More unbounded retries within read-while-writer breaking loop detection.
> ------------------------------------------------------------------------
>
> Key: TS-3767
> URL: https://issues.apache.org/jira/browse/TS-3767
> Project: Traffic Server
> Issue Type: Bug
> Components: Cache
> Reporter: Sudheer Vinukonda
> Labels: A
> Fix For: 6.1.0
>
>
> [~zwoop] noticed the loop detection (request to self) fails with default settings, when read-while-writer functionality is enabled. Upon further investigation, this seems to be caused by more cases of unbounded retries within read-while-writer (similar to TS-3622), which prevent reaching the loop detection point (currently, done after the cache lookup state).
> TS-3622 added a bound for read-while-writer in the case, where the writer lock is available, but, the first fragment is not downloaded yet, but, there are more cases which cause similar issues.
> Discussing with [~zwoop], we think we should add bounds in all such cases with configurable timer (duration) and configurable max number of retries.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)