You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by am...@apache.org on 2014/01/22 01:37:06 UTC

git commit: TS-2505 Add --offline to traffic_line

Updated Branches:
  refs/heads/master 813d3e1a2 -> f8cb33617


TS-2505 Add --offline to traffic_line


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/f8cb3361
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/f8cb3361
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/f8cb3361

Branch: refs/heads/master
Commit: f8cb33617947d6ca27208a57cff7758fc7529391
Parents: 813d3e1
Author: Alan M. Carroll <am...@network-geographics.com>
Authored: Tue Jan 21 18:36:48 2014 -0600
Committer: Alan M. Carroll <am...@network-geographics.com>
Committed: Tue Jan 21 18:36:48 2014 -0600

----------------------------------------------------------------------
 CHANGES                                     |   2 +
 cmd/traffic_line/traffic_line.cc            |   5 +
 doc/admin/configuring-traffic-server.en.rst |  26 ++--
 doc/reference/commands/traffic_line.en.rst  |  13 +-
 iocore/cache/Cache.cc                       | 153 ++++++++++++++---------
 iocore/cache/CacheTest.cc                   |  72 +++++++++++
 iocore/cache/I_Cache.h                      |  22 ++++
 iocore/cache/P_CacheDisk.h                  |   2 +-
 lib/records/I_RecEvents.h                   |   3 +
 lib/ts/ink_args.cc                          |   9 +-
 lib/ts/ink_args.h                           |   2 +-
 mgmt/BaseManager.h                          |   7 +-
 mgmt/LocalManager.cc                        |   2 +-
 mgmt/ProcessManager.cc                      |   5 +-
 mgmt/api/CoreAPI.cc                         |  14 +++
 mgmt/api/CoreAPI.h                          |   1 +
 mgmt/api/CoreAPIRemote.cc                   |  12 ++
 mgmt/api/INKMgmtAPI.cc                      |   6 +
 mgmt/api/NetworkUtilsDefs.h                 |   1 +
 mgmt/api/TSControlMain.cc                   |  34 +++++
 mgmt/api/TSControlMain.h                    |   1 +
 mgmt/api/include/mgmtapi.h                  |   6 +
 proxy/Main.cc                               |  25 ++++
 23 files changed, 339 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index c73e4c3..0140b40 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,8 @@
                                                          -*- coding: utf-8 -*-
 Changes with Apache Traffic Server 4.2.0
 
+  *) [TS-2505] Add traffic_line --offline option.
+
   *) [TS-2305] Fall back to ftruncate if posix_fallocate fails.
 
   *) [TS-2504] Support OpenSSL installations that use the lib64 directory.

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/cmd/traffic_line/traffic_line.cc
----------------------------------------------------------------------
diff --git a/cmd/traffic_line/traffic_line.cc b/cmd/traffic_line/traffic_line.cc
index 7f99f5b..f25c95d 100644
--- a/cmd/traffic_line/traffic_line.cc
+++ b/cmd/traffic_line/traffic_line.cc
@@ -46,6 +46,7 @@ static int ClearCluster;
 static int ClearNode;
 static char ZeroCluster[1024];
 static char ZeroNode[1024];
+static char StorageCmdOffline[1024];
 static int VersionFlag;
 
 static TSError
@@ -84,6 +85,8 @@ handleArgInvocation()
     fprintf(stderr, "Query Deadhosts is not implemented, it requires support for congestion control.\n");
     fprintf(stderr, "For more details, examine the old code in cli/CLI.cc: QueryDeadhosts()\n");
     return TS_ERR_FAIL;
+  } else if (*StorageCmdOffline) {
+    return TSStorageDeviceCmdOffline(StorageCmdOffline);
   } else if (*ReadVar != '\0') {        // Handle a value read
     if (*SetVar != '\0' || *VarValue != '\0') {
       fprintf(stderr, "%s: Invalid Argument Combination: Can not read and set values at the same time\n", programName);
@@ -162,6 +165,7 @@ main(int /* argc ATS_UNUSED */, char **argv)
   ZeroCluster[0] = '\0';
   ZeroNode[0] = '\0';
   VersionFlag = 0;
+  *StorageCmdOffline = 0;
 
   // build the application information structure
   appVersionInfo.setup(PACKAGE_NAME,"traffic_line", PACKAGE_VERSION, __DATE__, __TIME__, BUILD_MACHINE, BUILD_PERSON, "");
@@ -185,6 +189,7 @@ main(int /* argc ATS_UNUSED */, char **argv)
     {"clear_node", 'c', "Clear Statistics (local node)", "F", &ClearNode, NULL, NULL},
     {"zero_cluster", 'Z', "Zero Specific Statistic (cluster wide)", "S1024", &ZeroCluster, NULL, NULL},
     {"zero_node", 'z', "Zero Specific Statistic (local node)", "S1024", &ZeroNode, NULL, NULL},
+    {"offline", '-', "Mark cache storage offline", "S1024", &StorageCmdOffline, NULL, NULL},
     {"version", 'V', "Print Version Id", "T", &VersionFlag, NULL, NULL},
   };
 

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/doc/admin/configuring-traffic-server.en.rst
----------------------------------------------------------------------
diff --git a/doc/admin/configuring-traffic-server.en.rst b/doc/admin/configuring-traffic-server.en.rst
index ca33e86..a06eaea 100644
--- a/doc/admin/configuring-traffic-server.en.rst
+++ b/doc/admin/configuring-traffic-server.en.rst
@@ -6,20 +6,20 @@ Configuring Traffic Server
 
 .. Licensed to the Apache Software Foundation (ASF) under one
    or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
- 
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
    http://www.apache.org/licenses/LICENSE-2.0
- 
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
 
 Traffic Server provides several options for configuring the system.
 

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/doc/reference/commands/traffic_line.en.rst
----------------------------------------------------------------------
diff --git a/doc/reference/commands/traffic_line.en.rst b/doc/reference/commands/traffic_line.en.rst
index 08683fb..213cd40 100644
--- a/doc/reference/commands/traffic_line.en.rst
+++ b/doc/reference/commands/traffic_line.en.rst
@@ -5,9 +5,9 @@
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
- 
+
    http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -113,6 +113,13 @@ Options
 
     Reset performance statistics to zero on the local node.
 
+.. option:: --offline PATH
+
+   Mark a cache storage device as offline. The storage is identified by a *path* which must match exactly a path
+   specified in :file:`storage.config`. This removes the storage from the cache and redirects requests that would have
+   used this storage to other storage. This has exactly the same effect as a disk failure for that storage. This does
+   not persist across restarts of the :program:`traffic_server` process.
+
 .. traffic-line-performance-statistics
 
 Performance Statistics
@@ -774,7 +781,7 @@ The :option:`traffic_line -r` option accepts the following variable names::
 Examples
 ========
 
-Configure Traffic Server to log in Squid format:: 
+Configure Traffic Server to log in Squid format::
 
     $ traffic_line -s proxy.config.log.squid_log_enabled -v 1
     $ traffic_line -s proxy.config.log.squid_log_is_ascii -v 1

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/iocore/cache/Cache.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
index 16dbc37..a8013c0 100644
--- a/iocore/cache/Cache.cc
+++ b/iocore/cache/Cache.cc
@@ -2039,6 +2039,7 @@ build_vol_hash_table(CacheHostRecord *cp)
   unsigned int *gotvol = (unsigned int *) ats_malloc(sizeof(unsigned int) * num_vols);
   unsigned int *rnd = (unsigned int *) ats_malloc(sizeof(unsigned int) * num_vols);
   unsigned short *ttable = (unsigned short *)ats_malloc(sizeof(unsigned short) * VOL_HASH_TABLE_SIZE);
+  unsigned short *old_table;
   unsigned int *rtable_entries = (unsigned int *) ats_malloc(sizeof(unsigned int) * num_vols);
   unsigned int rtable_size = 0;
 
@@ -2088,8 +2089,8 @@ build_vol_hash_table(CacheHostRecord *cp)
     Debug("cache_init", "build_vol_hash_table index %d mapped to %d requested %d got %d", i, mapping[i], forvol[i], gotvol[i]);
   }
   // install new table
-  if (cp->vol_hash_table)
-    new_Freer(cp->vol_hash_table, CACHE_MEM_FREE_TIMEOUT);
+  if (0 != (old_table = ink_atomic_swap(&(cp->vol_hash_table), ttable)))
+    new_Freer(old_table, CACHE_MEM_FREE_TIMEOUT);
   ats_free(mapping);
   ats_free(p);
   ats_free(forvol);
@@ -2097,7 +2098,6 @@ build_vol_hash_table(CacheHostRecord *cp)
   ats_free(rnd);
   ats_free(rtable_entries);
   ats_free(rtable);
-  cp->vol_hash_table = ttable;
 }
 
 void
@@ -2108,29 +2108,94 @@ Cache::vol_initialized(bool result) {
     open_done();
 }
 
+/** Set the state of a disk programmatically.
+*/
+bool
+CacheProcessor::mark_storage_offline( CacheDisk* d ///< Target disk
+  ) {
+  bool zret; // indicates whether there's any online storage left.
+  int p;
+  uint64_t total_bytes_delete = 0;
+  uint64_t total_dir_delete = 0;
+  uint64_t used_dir_delete = 0;
+
+  if (!DISK_BAD(d)) SET_DISK_BAD(d);
+
+  for (p = 0; p < gnvol; p++) {
+    if (d->fd == gvol[p]->fd) {
+      total_dir_delete += gvol[p]->buckets * gvol[p]->segments * DIR_DEPTH;
+      used_dir_delete += dir_entries_used(gvol[p]);
+      total_bytes_delete += gvol[p]->len - vol_dirlen(gvol[p]);
+    }
+  }
+
+  RecIncrGlobalRawStat(cache_rsb, cache_bytes_total_stat, -total_bytes_delete);
+  RecIncrGlobalRawStat(cache_rsb, cache_direntries_total_stat, -total_dir_delete);
+  RecIncrGlobalRawStat(cache_rsb, cache_direntries_used_stat, -used_dir_delete);
+
+  if (theCache) {
+    rebuild_host_table(theCache);
+  }
+  if (theStreamCache) {
+    rebuild_host_table(theStreamCache);
+  }
+
+  zret = this->has_online_storage();
+  if (!zret) {
+    Warning("All storage devices offline, cache disabled");
+    CacheProcessor::cache_ready = 0;
+  } else { // check cache types specifically
+    if (theCache && !theCache->hosttable->gen_host_rec.vol_hash_table) {
+      unsigned int caches_ready = 0;
+      caches_ready = caches_ready | (1 << CACHE_FRAG_TYPE_HTTP);
+      caches_ready = caches_ready | (1 << CACHE_FRAG_TYPE_NONE);
+      caches_ready = ~caches_ready;
+      CacheProcessor::cache_ready &= caches_ready;
+      Warning("all volumes for http cache are corrupt, http cache disabled");
+    }
+    if (theStreamCache && !theStreamCache->hosttable->gen_host_rec.vol_hash_table) {
+      unsigned int caches_ready = 0;
+      caches_ready = caches_ready | (1 << CACHE_FRAG_TYPE_RTSP);
+      caches_ready = ~caches_ready;
+      CacheProcessor::cache_ready &= caches_ready;
+      Warning("all volumes for mixt cache are corrupt, mixt cache disabled");
+    }
+  }
+
+  return zret;
+}
+
+bool
+CacheProcessor::has_online_storage() const {
+  CacheDisk** dptr = gdisks;
+  for (int disk_no = 0 ; disk_no < gndisks ; ++disk_no, ++dptr) {
+    if (!DISK_BAD(*dptr)) return true;
+  }
+  return false;
+}
+
 int
 AIO_Callback_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) {
   /* search for the matching file descriptor */
   if (!CacheProcessor::cache_ready)
     return EVENT_DONE;
   int disk_no = 0;
-  int good_disks = 0;
   AIOCallback *cb = (AIOCallback *) data;
 #if TS_USE_INTERIM_CACHE == 1
   for (; disk_no < gn_interim_disks; disk_no++) {
     CacheDisk *d = g_interim_disks[disk_no];
 
     if (d->fd == cb->aiocb.aio_fildes) {
+      char message[256];
+
       d->num_errors++;
       if (!DISK_BAD(d)) {
-        char message[128];
-        snprintf(message, sizeof(message), "Error accessing Disk %s", d->path);
+        snprintf(message, sizeof(message), "Error accessing Disk %s [%d/%d]", d->path, d->num_errors, cache_config_max_disk_errors);
         Warning("%s", message);
         REC_SignalManager(REC_SIGNAL_CACHE_WARNING, message);
       } else if (!DISK_BAD_SIGNALLED(d)) {
-        char message[128];
         snprintf(message, sizeof(message),
-            "too many errors accessing disk %s: declaring disk bad", d->path);
+                 "too many errors [%d] accessing disk %s: declaring disk bad", d->num_errors, d->path);
         Warning("%s", message);
         REC_SignalManager(REC_SIGNAL_CACHE_ERROR, message);
         good_interim_disks--;
@@ -2138,77 +2203,28 @@ AIO_Callback_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data
     }
   }
 #endif
+
   for (; disk_no < gndisks; disk_no++) {
     CacheDisk *d = gdisks[disk_no];
 
     if (d->fd == cb->aiocb.aio_fildes) {
+      char message[256];
       d->num_errors++;
 
       if (!DISK_BAD(d)) {
-        char message[128];
         snprintf(message, sizeof(message), "Error accessing Disk %s [%d/%d]", d->path, d->num_errors, cache_config_max_disk_errors);
         Warning("%s", message);
         REC_SignalManager(REC_SIGNAL_CACHE_WARNING, message);
       } else if (!DISK_BAD_SIGNALLED(d)) {
-
-        char message[128];
         snprintf(message, sizeof(message), "too many errors accessing disk %s [%d/%d]: declaring disk bad", d->path, d->num_errors, cache_config_max_disk_errors);
         Warning("%s", message);
         REC_SignalManager(REC_SIGNAL_CACHE_ERROR, message);
-        /* subtract the disk space that was being used from  the cache size stat */
-        // dir entries stat
-        int p;
-        uint64_t total_bytes_delete = 0;
-        uint64_t total_dir_delete = 0;
-        uint64_t used_dir_delete = 0;
-
-        for (p = 0; p < gnvol; p++) {
-          if (d->fd == gvol[p]->fd) {
-            total_dir_delete += gvol[p]->buckets * gvol[p]->segments * DIR_DEPTH;
-            used_dir_delete += dir_entries_used(gvol[p]);
-            total_bytes_delete += gvol[p]->len - vol_dirlen(gvol[p]);
-          }
-        }
-
-        RecIncrGlobalRawStat(cache_rsb, cache_bytes_total_stat, -total_bytes_delete);
-        RecIncrGlobalRawStat(cache_rsb, cache_direntries_total_stat, -total_dir_delete);
-        RecIncrGlobalRawStat(cache_rsb, cache_direntries_used_stat, -used_dir_delete);
-
-        if (theCache) {
-          rebuild_host_table(theCache);
-        }
-        if (theStreamCache) {
-          rebuild_host_table(theStreamCache);
-        }
+        cacheProcessor.mark_storage_offline(d); // take it out of service
       }
-      if (good_disks)
-        return EVENT_DONE;
+      break;
     }
-    if (!DISK_BAD(d))
-      good_disks++;
-  }
-  if (!good_disks) {
-    Warning("all disks are bad, cache disabled");
-    CacheProcessor::cache_ready = 0;
-    delete cb;
-    return EVENT_DONE;
   }
 
-  if (theCache && !theCache->hosttable->gen_host_rec.vol_hash_table) {
-    unsigned int caches_ready = 0;
-    caches_ready = caches_ready | (1 << CACHE_FRAG_TYPE_HTTP);
-    caches_ready = caches_ready | (1 << CACHE_FRAG_TYPE_NONE);
-    caches_ready = ~caches_ready;
-    CacheProcessor::cache_ready &= caches_ready;
-    Warning("all volumes for http cache are corrupt, http cache disabled");
-  }
-  if (theStreamCache && !theStreamCache->hosttable->gen_host_rec.vol_hash_table) {
-    unsigned int caches_ready = 0;
-    caches_ready = caches_ready | (1 << CACHE_FRAG_TYPE_RTSP);
-    caches_ready = ~caches_ready;
-    CacheProcessor::cache_ready &= caches_ready;
-    Warning("all volumes for mixt cache are corrupt, mixt cache disabled");
-  }
   delete cb;
   return EVENT_DONE;
 }
@@ -3475,3 +3491,18 @@ CacheProcessor::remove(Continuation *cont, URL *url, bool cluster_cache_local, C
   return caches[frag_type]->remove(cont, &md5, frag_type, true, false, const_cast<char*>(hostname), len);
 }
 
+CacheDisk*
+CacheProcessor::find_by_path(char const* path, int len)
+{
+  if (CACHE_INITIALIZED == initialized) {
+    // If no length is passed in, assume it's null terminated.
+    if (0 >= len && 0 != *path) len = strlen(path);
+
+    for ( int i = 0 ; i < gndisks ; ++i ) {
+      if (0 == strncmp(path, gdisks[i]->path, len))
+        return gdisks[i];
+    }
+  }
+
+  return 0;
+}

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/iocore/cache/CacheTest.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/CacheTest.cc b/iocore/cache/CacheTest.cc
index ecc1a72..3c601ca 100644
--- a/iocore/cache/CacheTest.cc
+++ b/iocore/cache/CacheTest.cc
@@ -415,3 +415,75 @@ EXCLUSIVE_REGRESSION_TEST(cache)(RegressionTest *t, int /* atype ATS_UNUSED */,
 
 void force_link_CacheTest() {
 }
+
+// run -R 3 -r cache_disk_replacement_stability
+
+REGRESSION_TEST(cache_disk_replacement_stability)(RegressionTest *t, int level, int *pstatus) {
+  static int const MAX_VOLS = 26; // maximum values used in any test.
+  static uint64_t DEFAULT_SKIP = 8192;
+  static uint64_t DEFAULT_STRIPE_SIZE = 1024ULL * 1024 * 1024 * 911; // 911G
+  CacheDisk disk; // Only need one because it's just checked for failure.
+  CacheHostRecord hr1, hr2;
+  Vol* sample;
+  static int const sample_idx = 16;
+  Vol vols[MAX_VOLS];
+  Vol* vol_ptrs[MAX_VOLS]; // array of pointers.
+  char buff[2048];
+
+  // Only run at the highest levels.
+  if (REGRESSION_TEST_EXTENDED > level) {
+    *pstatus = REGRESSION_TEST_PASSED;
+    return;
+  }
+
+  *pstatus = REGRESSION_TEST_INPROGRESS;
+
+  disk.num_errors = 0;
+
+  for ( int i = 0 ; i < MAX_VOLS ; ++i ) {
+    vol_ptrs[i] = vols + i;
+    vols[i].disk = &disk;
+    vols[i].len = DEFAULT_STRIPE_SIZE;
+    snprintf(buff, sizeof(buff), "/dev/sd%c %" PRIu64 ":%" PRIu64,
+             'a' + i, DEFAULT_SKIP, vols[i].len);
+    vols[i].hash_id_md5.encodeBuffer(buff, strlen(buff));
+  }
+
+  hr1.vol_hash_table = 0;
+  hr1.vols = vol_ptrs;
+  hr1.num_vols = MAX_VOLS;
+  build_vol_hash_table(&hr1);
+
+  hr2.vol_hash_table = 0;
+  hr2.vols = vol_ptrs;
+  hr2.num_vols = MAX_VOLS;
+
+  sample = vols + sample_idx;
+  sample->len = 1024ULL * 1024 * 1024 * (1024+128); // 1.1 TB
+  snprintf(buff, sizeof(buff), "/dev/sd%c %" PRIu64 ":%" PRIu64,
+           'a' + sample_idx, DEFAULT_SKIP, sample->len);
+  sample->hash_id_md5.encodeBuffer(buff, strlen(buff));
+  build_vol_hash_table(&hr2);
+
+  // See what the difference is
+  int to = 0, from = 0;
+  int then = 0, now = 0;
+  for ( int i = 0 ; i < VOL_HASH_TABLE_SIZE ; ++i ) {
+    if (hr1.vol_hash_table[i] == sample_idx) ++then;
+    if (hr2.vol_hash_table[i] == sample_idx) ++now;
+    if (hr1.vol_hash_table[i] != hr2.vol_hash_table[i]) {
+      if (hr1.vol_hash_table[i] == sample_idx)
+        ++from;
+      else
+        ++to;
+    }
+  }
+  rprintf(t, "Cache stability difference - "
+          "delta = %d of %d : %d to, %d from, originally %d slots, now %d slots (net gain = %d/%d)\n"
+          , to+from, VOL_HASH_TABLE_SIZE, to, from, then, now, now-then, to-from
+    );
+  *pstatus = REGRESSION_TEST_PASSED;
+
+  hr1.vols = 0;
+  hr2.vols = 0;
+}

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/iocore/cache/I_Cache.h
----------------------------------------------------------------------
diff --git a/iocore/cache/I_Cache.h b/iocore/cache/I_Cache.h
index b89357f..5a84f0f 100644
--- a/iocore/cache/I_Cache.h
+++ b/iocore/cache/I_Cache.h
@@ -52,6 +52,7 @@
 #define CACHE_COMPRESSION_LIBLZMA        3
 
 struct CacheVC;
+struct CacheDisk;
 #ifdef HTTP_CACHE
 class CacheLookupHttpConfig;
 class URL;
@@ -132,6 +133,27 @@ struct CacheProcessor:public Processor
 
   Action *deref(Continuation *cont, CacheKey *key, bool cluster_cache_local,
                 CacheFragType frag_type = CACHE_FRAG_TYPE_HTTP, char *hostname = 0, int host_len = 0);
+
+  /** Mark physical disk/device/file as offline.
+      All stripes for this device are disabled.
+
+      @return @c true if there are any storage devices remaining online, @c false if not.
+
+      @note This is what is called if a disk is disabled due to I/O errors.
+  */
+  bool mark_storage_offline(CacheDisk* d);
+
+  /** Find the storage for a @a path.
+      If @a len is 0 then @a path is presumed null terminated.
+      @return @c NULL if the path does not match any defined storage.
+   */
+  CacheDisk* find_by_path(char const* path, int len = 0);
+
+  /** Check if there are any online storage devices.
+      If this returns @c false then the cache should be disabled as there is no storage available.
+  */
+  bool has_online_storage() const;
+
   static int IsCacheEnabled();
 
   static bool IsCacheReady(CacheFragType type);

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/iocore/cache/P_CacheDisk.h
----------------------------------------------------------------------
diff --git a/iocore/cache/P_CacheDisk.h b/iocore/cache/P_CacheDisk.h
index e5133d5..c704bdc 100644
--- a/iocore/cache/P_CacheDisk.h
+++ b/iocore/cache/P_CacheDisk.h
@@ -28,7 +28,7 @@
 
 extern int cache_config_max_disk_errors;
 
-#define DISK_BAD(_x)                    (_x->num_errors >= cache_config_max_disk_errors)
+#define DISK_BAD(_x)                    ((_x)->num_errors >= cache_config_max_disk_errors)
 #define DISK_BAD_SIGNALLED(_x)          (_x->num_errors > cache_config_max_disk_errors)
 #define SET_DISK_BAD(_x)                (_x->num_errors = cache_config_max_disk_errors)
 #define SET_DISK_OKAY(_x)               (_x->num_errors = 0)

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/lib/records/I_RecEvents.h
----------------------------------------------------------------------
diff --git a/lib/records/I_RecEvents.h b/lib/records/I_RecEvents.h
index bb7f580..7454b48 100644
--- a/lib/records/I_RecEvents.h
+++ b/lib/records/I_RecEvents.h
@@ -35,5 +35,8 @@
 #define REC_EVENT_HTTP_CLUSTER_DELTA    10007
 #define REC_EVENT_ROLL_LOG_FILES        10008
 #define REC_EVENT_LIBRECORDS            10009
+#define REC_EVENT_CONFIG_FILE_UPDATE_NO_INC_VERSION   10010
+
+#define REC_EVENT_CACHE_DISK_CONTROL     10011
 
 #endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/lib/ts/ink_args.cc
----------------------------------------------------------------------
diff --git a/lib/ts/ink_args.cc b/lib/ts/ink_args.cc
index ba537b9..59722ef 100644
--- a/lib/ts/ink_args.cc
+++ b/lib/ts/ink_args.cc
@@ -213,8 +213,13 @@ usage(const ArgumentDescription * argument_descriptions, unsigned n_argument_des
   for (unsigned i = 0; i < n_argument_descriptions; i++) {
     if (!argument_descriptions[i].description)
       continue;
-    fprintf(stderr, "  -%c, --%-17s %s",
-            argument_descriptions[i].key,
+
+    fprintf(stderr, "  ");
+
+    if ('-' == argument_descriptions[i].key) fprintf(stderr, "   ");
+    else fprintf(stderr, "-%c,", argument_descriptions[i].key);
+                                               
+    fprintf(stderr, " --%-17s %s",
             argument_descriptions[i].name,
             argument_types_descriptions[argument_descriptions[i].type ?
                                         strchr(argument_types_keys,

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/lib/ts/ink_args.h
----------------------------------------------------------------------
diff --git a/lib/ts/ink_args.h b/lib/ts/ink_args.h
index c32aa1a..74d1081 100644
--- a/lib/ts/ink_args.h
+++ b/lib/ts/ink_args.h
@@ -40,7 +40,7 @@ typedef void ArgumentFunction(const ArgumentDescription * argument_descriptions,
 struct ArgumentDescription
 {
   const char *name;
-  char key;
+  char key; // set to '-' if no single character key.
   /*
      "I" = integer
      "L" = int64_t

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/BaseManager.h
----------------------------------------------------------------------
diff --git a/mgmt/BaseManager.h b/mgmt/BaseManager.h
index 5063a93..a651e28 100644
--- a/mgmt/BaseManager.h
+++ b/mgmt/BaseManager.h
@@ -64,10 +64,15 @@
 #define MGMT_EVENT_ROLL_LOG_FILES        10008
 #define MGMT_EVENT_LIBRECORDS            10009
 #define MGMT_EVENT_CONFIG_FILE_UPDATE_NO_INC_VERSION   10010
+// cache storage operations - each is a distinct event.
+// this is done because the code paths share nothing but boilerplate logic
+// so it's easier to do this than to try to encode an opcode and yet another
+// case statement.
+#define MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE     10011
 
 /***********************************************************************
  *
- * MODULAIZATTION: if you are adding new signals, please ensure to add
+ * MODULARIZATION: if you are adding new signals, please ensure to add
  *                 the corresponding signals in librecords/I_RecSignals.h
  *
  *

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/LocalManager.cc
----------------------------------------------------------------------
diff --git a/mgmt/LocalManager.cc b/mgmt/LocalManager.cc
index dc30dcd..898e87c 100644
--- a/mgmt/LocalManager.cc
+++ b/mgmt/LocalManager.cc
@@ -909,7 +909,7 @@ LocalManager::processEventQueue()
         ink_assert(enqueue(mgmt_event_queue, mh));
         return;
       }
-      Debug("lm", "[TrafficManager] ==> Sending signal event '%d'\n", mh->msg_id);
+      Debug("lm", "[TrafficManager] ==> Sending signal event '%d' payload=%d\n", mh->msg_id, mh->data_len);
       lmgmt->sendMgmtMsgToProcesses(mh);
     }
     ats_free(mh);

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/ProcessManager.cc
----------------------------------------------------------------------
diff --git a/mgmt/ProcessManager.cc b/mgmt/ProcessManager.cc
index 35bbae6..0328893 100644
--- a/mgmt/ProcessManager.cc
+++ b/mgmt/ProcessManager.cc
@@ -125,7 +125,7 @@ ProcessManager::processEventQueue()
   while (!queue_is_empty(mgmt_event_queue)) {
     MgmtMessageHdr *mh = (MgmtMessageHdr *) dequeue(mgmt_event_queue);
 
-    Debug("pmgmt", "[ProcessManager] ==> Processing event id '%d'\n", mh->msg_id);
+    Debug("pmgmt", "[ProcessManager] ==> Processing event id '%d' payload=%d\n", mh->msg_id, mh->data_len);
     if (mh->data_len > 0) {
       executeMgmtCallback(mh->msg_id, (char *) mh + sizeof(MgmtMessageHdr), mh->data_len);
     } else {
@@ -333,6 +333,9 @@ ProcessManager::handleMgmtMsgFromLM(MgmtMessageHdr * mh)
   case MGMT_EVENT_LIBRECORDS:
     signalMgmtEntity(MGMT_EVENT_LIBRECORDS, data_raw, mh->data_len);
     break;
+  case MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE:
+    signalMgmtEntity(MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE, data_raw, mh->data_len);
+    break;
   default:
     mgmt_elog(stderr, 0, "[ProcessManager::pollLMConnection] unknown type %d\n", mh->msg_id);
     break;

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/CoreAPI.cc
----------------------------------------------------------------------
diff --git a/mgmt/api/CoreAPI.cc b/mgmt/api/CoreAPI.cc
index 370d6a7..b99d616 100644
--- a/mgmt/api/CoreAPI.cc
+++ b/mgmt/api/CoreAPI.cc
@@ -291,6 +291,20 @@ Bounce(bool cluster)
   return TS_ERR_OKAY;
 }
 
+/*-------------------------------------------------------------------------
+ * StorageDeviceCmdOffline
+ *-------------------------------------------------------------------------
+ * Disable a storage device.
+ * [amc] I don't think this is called but is required because of the way the
+ * CoreAPI is linked (it must match the remote CoreAPI signature so compiling
+ * this source or CoreAPIRemote.cc yields the same set of symbols).
+ */
+TSError
+StorageDeviceCmdOffline(char const* dev)
+{
+  lmgmt->signalEvent(MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE, dev);
+  return TS_ERR_OKAY;
+}
 /**************************************************************************
  * RECORD OPERATIONS
  *************************************************************************/

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/CoreAPI.h
----------------------------------------------------------------------
diff --git a/mgmt/api/CoreAPI.h b/mgmt/api/CoreAPI.h
index dfd2cc1..87dd84e 100644
--- a/mgmt/api/CoreAPI.h
+++ b/mgmt/api/CoreAPI.h
@@ -55,6 +55,7 @@ TSError Reconfigure();         // TS reread config files
 TSError Restart(bool cluster); //restart TM
 TSError HardRestart();         //restart traffic_cop
 TSError Bounce(bool cluster);  //restart traffic_server
+TSError StorageDeviceCmdOffline(char const* dev); // Storage device operation.
 
 /***************************************************************************
  * Record Operations

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/CoreAPIRemote.cc
----------------------------------------------------------------------
diff --git a/mgmt/api/CoreAPIRemote.cc b/mgmt/api/CoreAPIRemote.cc
index 6c0ecc3..8ccdac0 100644
--- a/mgmt/api/CoreAPIRemote.cc
+++ b/mgmt/api/CoreAPIRemote.cc
@@ -464,6 +464,18 @@ Bounce(bool cluster)
 }
 
 
+/*-------------------------------------------------------------------------
+ * StorageDeviceCmdOffline
+ *-------------------------------------------------------------------------
+ * Disable a storage device.
+ */
+TSError
+StorageDeviceCmdOffline(char const* dev)
+{
+  TSError ret;
+  ret = send_request_name(main_socket_fd, STORAGE_DEVICE_CMD_OFFLINE, dev);
+  return TS_ERR_OKAY != ret ? ret : parse_reply(main_socket_fd);
+}
 /***************************************************************************
  * Record Operations
  ***************************************************************************/

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/INKMgmtAPI.cc
----------------------------------------------------------------------
diff --git a/mgmt/api/INKMgmtAPI.cc b/mgmt/api/INKMgmtAPI.cc
index ac9a139..709debc 100644
--- a/mgmt/api/INKMgmtAPI.cc
+++ b/mgmt/api/INKMgmtAPI.cc
@@ -1819,6 +1819,12 @@ TSBounce(bool cluster)
   return Bounce(cluster);
 }
 
+tsapi TSError
+TSStorageDeviceCmdOffline(char const* dev)
+{
+  return StorageDeviceCmdOffline(dev);
+}
+
 
 /*--- diags output operations ---------------------------------------------*/
 tsapi void

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/NetworkUtilsDefs.h
----------------------------------------------------------------------
diff --git a/mgmt/api/NetworkUtilsDefs.h b/mgmt/api/NetworkUtilsDefs.h
index 987874a..6f0127b 100644
--- a/mgmt/api/NetworkUtilsDefs.h
+++ b/mgmt/api/NetworkUtilsDefs.h
@@ -79,6 +79,7 @@ typedef enum
   DIAGS,
   STATS_RESET_NODE,
   STATS_RESET_CLUSTER,
+  STORAGE_DEVICE_CMD_OFFLINE,
   UNDEFINED_OP /* This must be last */
 } OpType;
 

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/TSControlMain.cc
----------------------------------------------------------------------
diff --git a/mgmt/api/TSControlMain.cc b/mgmt/api/TSControlMain.cc
index d96c6a9..c507f5b 100644
--- a/mgmt/api/TSControlMain.cc
+++ b/mgmt/api/TSControlMain.cc
@@ -317,6 +317,17 @@ ts_ctrl_main(void *arg)
               }
               break;
 
+            case STORAGE_DEVICE_CMD_OFFLINE:
+              ret = handle_storage_device_cmd_offline(client_entry->sock_info, req);
+              ats_free(req);     // free the request allocated by preprocess_msg
+              if (ret == TS_ERR_NET_WRITE || ret == TS_ERR_NET_EOF) {
+                Debug("ts_main", "[ts_ctrl_main] ERROR:handle_storage_device_cmd_offline\n");
+                remove_client(client_entry, accepted_con);
+                con_entry = ink_hash_table_iterator_next(accepted_con, &con_state);
+                continue;
+              }
+              break;
+
             case EVENT_RESOLVE:
               ret = handle_event_resolve(client_entry->sock_info, req);
               ats_free(req);     // free the request allocated by preprocess_msg
@@ -732,6 +743,29 @@ handle_restart(struct SocketInfo sock_info, char *req, bool bounce)
 }
 
 /**************************************************************************
+ * handle_storage_device_cmd_offline
+ *
+ * purpose: handle storage offline command.
+ * input: struct SocketInfo sock_info - the socket to use to talk to client
+ * output: TS_ERR_xx
+ * note: None
+ *************************************************************************/
+TSError
+handle_storage_device_cmd_offline(struct SocketInfo sock_info, char *req)
+{
+  TSError ret = TS_ERR_OKAY;
+
+  if (!req) {
+    ret = send_reply(sock_info, TS_ERR_PARAMS);
+    return ret;                 // shouldn't get here
+  }
+  // forward to server
+  lmgmt->signalEvent(MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE, req);
+  ret = send_reply(sock_info, ret);
+  return ret;
+}
+
+/**************************************************************************
  * handle_event_resolve
  *
  * purpose: handles request to resolve an event

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/TSControlMain.h
----------------------------------------------------------------------
diff --git a/mgmt/api/TSControlMain.h b/mgmt/api/TSControlMain.h
index 4be762d..397b8bb 100644
--- a/mgmt/api/TSControlMain.h
+++ b/mgmt/api/TSControlMain.h
@@ -58,6 +58,7 @@ TSError handle_proxy_state_get(struct SocketInfo sock_info);
 TSError handle_proxy_state_set(struct SocketInfo sock_info, char *req);
 TSError handle_reconfigure(struct SocketInfo sock_info);
 TSError handle_restart(struct SocketInfo sock_info, char *req, bool bounce);
+TSError handle_storage_device_cmd_offline(struct SocketInfo sock_info, char *req);
 
 TSError handle_event_resolve(struct SocketInfo sock_info, char *req);
 TSError handle_event_get_mlt(struct SocketInfo sock_info);

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/mgmt/api/include/mgmtapi.h
----------------------------------------------------------------------
diff --git a/mgmt/api/include/mgmtapi.h b/mgmt/api/include/mgmtapi.h
index c562733..7b962a6 100644
--- a/mgmt/api/include/mgmtapi.h
+++ b/mgmt/api/include/mgmtapi.h
@@ -1026,6 +1026,12 @@ extern "C"
  */
   tsapi TSError TSBounce(bool cluster);
 
+/* TSStorageDeviceOp: Request an operation on a storage device.
+ * @arg dev Target device, specified by path to device.
+ * @return Success.
+ */
+  tsapi TSError TSStorageDeviceCmdOffline(char const* dev);
+
 /*--- diags output operations ---------------------------------------------*/
 /* TSDiags: enables users to manipulate run-time diagnostics, and print
  *           user-formatted notices, warnings and errors

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/f8cb3361/proxy/Main.cc
----------------------------------------------------------------------
diff --git a/proxy/Main.cc b/proxy/Main.cc
index d715319..a48e48a 100644
--- a/proxy/Main.cc
+++ b/proxy/Main.cc
@@ -117,6 +117,7 @@ extern "C" int plock(int);
 static const long MAX_LOGIN =  sysconf(_SC_LOGIN_NAME_MAX) <= 0 ? _POSIX_LOGIN_NAME_MAX :  sysconf(_SC_LOGIN_NAME_MAX);
 
 static void * mgmt_restart_shutdown_callback(void *, char *, int data_len);
+static void*  mgmt_storage_device_cmd_callback(void* x, char* data, int len);
 
 static int version_flag = DEFAULT_VERSION_FLAG;
 
@@ -1624,6 +1625,11 @@ main(int /* argc ATS_UNUSED */, char **argv)
     pmgmt->registerMgmtCallback(MGMT_EVENT_SHUTDOWN, mgmt_restart_shutdown_callback, NULL);
     pmgmt->registerMgmtCallback(MGMT_EVENT_RESTART, mgmt_restart_shutdown_callback, NULL);
 
+    // Callback for various storage commands. These all go to the same function so we
+    // pass the event code along so it can do the right thing. We cast that to <int> first
+    // just to be safe because the value is a #define, not a typed value.
+    pmgmt->registerMgmtCallback(MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE, mgmt_storage_device_cmd_callback, reinterpret_cast<void*>(static_cast<int>(MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE)));
+
     // The main thread also becomes a net thread.
     ink_set_thread_name("[ET_NET 0]");
 
@@ -1670,3 +1676,22 @@ mgmt_restart_shutdown_callback(void *, char *, int /* data_len ATS_UNUSED */)
   sync_cache_dir_on_shutdown();
   return NULL;
 }
+
+static void*
+mgmt_storage_device_cmd_callback(void* data, char* arg, int len)
+{
+  // data is the device name to control
+  CacheDisk* d = cacheProcessor.find_by_path(arg, len);
+  // Actual command is in @a data.
+  intptr_t cmd = reinterpret_cast<intptr_t>(data);
+
+  if (d) {
+    switch (cmd) {
+    case MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE:
+      Debug("server", "Marking %.*s offline", len, arg);
+      cacheProcessor.mark_storage_offline(d);
+      break;
+    }
+  }
+  return NULL;
+}