You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by jp...@apache.org on 2010/09/10 21:00:42 UTC

svn commit: r995934 - in /trafficserver/traffic/trunk/iocore/cache: Cache.cc CacheDir.cc CacheDisk.cc CachePagesInternal.cc CachePart.cc CacheWrite.cc I_CacheDefs.h I_Store.h P_CacheDir.h P_CacheDisk.h P_CacheInternal.h P_CachePart.h Store.cc

Author: jplevyak
Date: Fri Sep 10 19:00:41 2010
New Revision: 995934

URL: http://svn.apache.org/viewvc?rev=995934&view=rev
Log:
TS-43: add support for hardware sector sizes 512-8192 (e.g. 4096, the new standard)
Autodetected on Linux raw drives (no support yet for other OSs) and added a config:
   CONFIG proxy.config.cache.force_sector_size INT
To force a sector size for ALL disks.
This checkin changes the CACHE_DB and CACHE_DIR versions which means that the
cache will be cleared.

Modified:
    trafficserver/traffic/trunk/iocore/cache/Cache.cc
    trafficserver/traffic/trunk/iocore/cache/CacheDir.cc
    trafficserver/traffic/trunk/iocore/cache/CacheDisk.cc
    trafficserver/traffic/trunk/iocore/cache/CachePagesInternal.cc
    trafficserver/traffic/trunk/iocore/cache/CachePart.cc
    trafficserver/traffic/trunk/iocore/cache/CacheWrite.cc
    trafficserver/traffic/trunk/iocore/cache/I_CacheDefs.h
    trafficserver/traffic/trunk/iocore/cache/I_Store.h
    trafficserver/traffic/trunk/iocore/cache/P_CacheDir.h
    trafficserver/traffic/trunk/iocore/cache/P_CacheDisk.h
    trafficserver/traffic/trunk/iocore/cache/P_CacheInternal.h
    trafficserver/traffic/trunk/iocore/cache/P_CachePart.h
    trafficserver/traffic/trunk/iocore/cache/Store.cc

Modified: trafficserver/traffic/trunk/iocore/cache/Cache.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/Cache.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/Cache.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/Cache.cc Fri Sep 10 19:00:41 2010
@@ -73,6 +73,7 @@ int cache_config_agg_write_backlog = 524
 int cache_config_hit_evacuate_percent = 10;
 int cache_config_hit_evacuate_size_limit = 0;
 #endif
+int cache_config_force_sector_size = 0;
 int cache_config_enable_checksum = 0;
 int cache_config_alt_rewrite_max_size = 4096;
 int cache_config_read_while_writer = 0;
@@ -118,8 +119,8 @@ struct PartInitInfo
   PartInitInfo()
   {
     recover_pos = 0;
-    if ((part_h_f = (char *) valloc(4 * INK_BLOCK_SIZE)) != NULL)
-      memset(part_h_f, 0, 4 * INK_BLOCK_SIZE);
+    if ((part_h_f = (char *) valloc(4 * STORE_BLOCK_SIZE)) != NULL)
+      memset(part_h_f, 0, 4 * STORE_BLOCK_SIZE);
   }
   ~PartInitInfo()
   {
@@ -538,7 +539,12 @@ CacheProcessor::start_internal(int flags
       if (diskok) {
         gdisks[gndisks] = NEW(new CacheDisk());
         Debug("cache_hosting", "Disk: %d, blocks: %d", gndisks, blocks);
-        gdisks[gndisks]->open(path, blocks, offset, fd, clear);
+        int sector_size = sd->hw_sector_size;
+        if (sector_size < cache_config_force_sector_size)
+          sector_size = cache_config_force_sector_size;
+        if (sd->hw_sector_size <= 0 || sector_size > STORE_BLOCK_SIZE)
+          Error("bad hardware sector size");
+        gdisks[gndisks]->open(path, blocks, offset, sector_size, fd, clear);
         gndisks++;
       }
     } else
@@ -868,14 +874,14 @@ Part::db_check(bool fix)
   (void) fix;
   char tt[256];
   printf("    Data for [%s]\n", hash_id);
-  printf("        Blocks:          %d\n", (int) (len / INK_BLOCK_SIZE));
-  printf("        Write Position:  %d\n", (int) ((header->write_pos - skip) / INK_BLOCK_SIZE));
-  printf("        Phase:           %d\n", (int) !!header->phase);
+  printf("        Length:          %lld\n", (uint64)len);
+  printf("        Write Position:  %lld\n", (uint64) (header->write_pos - skip));
+  printf("        Phase:           %d\n", (int)!!header->phase);
   ink_ctime_r(&header->create_time, tt);
   tt[strlen(tt) - 1] = 0;
   printf("        Create Time:     %s\n", tt);
-  printf("        Sync Serial:     %d\n", (int) header->sync_serial);
-  printf("        Write Serial:    %d\n", (int) header->write_serial);
+  printf("        Sync Serial:     %u\n", (unsigned int)header->sync_serial);
+  printf("        Write Serial:    %u\n", (unsigned int)header->write_serial);
   printf("\n");
 
   return 0;
@@ -930,6 +936,7 @@ part_clear_init(Part *d)
   d->header->cycle = 0;
   d->header->create_time = time(NULL);
   d->header->dirty = 0;
+  d->sector_size = d->header->sector_size = d->disk->hw_sector_size;
   *d->footer = *d->header;
 }
 
@@ -968,13 +975,14 @@ Part::clear_dir()
 int
 Part::init(char *s, off_t blocks, off_t dir_skip, bool clear)
 {
-  dir_skip = ROUND_TO_BLOCK((dir_skip < START_POS ? START_POS : dir_skip));
+  dir_skip = ROUND_TO_STORE_BLOCK((dir_skip < START_POS ? START_POS : dir_skip));
   path = strdup(s);
   const size_t hash_id_size = strlen(s) + 32;
   hash_id = (char *) malloc(hash_id_size);
   ink_strncpy(hash_id, s, hash_id_size);
   const size_t s_size = strlen(s);
-  snprintf(hash_id + s_size, (hash_id_size - s_size), " %d:%d", (int) (dir_skip / INK_BLOCK_SIZE), (int) blocks);
+  snprintf(hash_id + s_size, (hash_id_size - s_size), " %lld:%lld", 
+           (uint64)dir_skip, (uint64)blocks);
   hash_id_md5.encodeBuffer(hash_id, strlen(hash_id));
   len = blocks * STORE_BLOCK_SIZE;
   ink_assert(len <= MAX_PART_SIZE);
@@ -985,7 +993,7 @@ Part::init(char *s, off_t blocks, off_t 
   // successive approximation, directory/meta data eats up some storage
   start = dir_skip;
   part_init_data(this);
-  data_blocks = (len - (start - skip)) / INK_BLOCK_SIZE;
+  data_blocks = (len - (start - skip)) / STORE_BLOCK_SIZE;
 #ifdef HIT_EVACUATE
   hit_evacuate_window = (data_blocks * cache_config_hit_evacuate_percent) / 100;
 #endif
@@ -1013,7 +1021,7 @@ Part::init(char *s, off_t blocks, off_t 
 
   dir = (Dir *) (raw_dir + part_headerlen(this));
   header = (PartHeaderFooter *) raw_dir;
-  footer = (PartHeaderFooter *) (raw_dir + part_dirlen(this) - ROUND_TO_BLOCK(sizeof(PartHeaderFooter)));
+  footer = (PartHeaderFooter *) (raw_dir + part_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter)));
 
   if (clear) {
     Note("clearing cache directory '%s'", hash_id);
@@ -1021,7 +1029,7 @@ Part::init(char *s, off_t blocks, off_t 
   }
 
   init_info = new PartInitInfo();
-  int footerlen = ROUND_TO_BLOCK(sizeof(PartHeaderFooter));
+  int footerlen = ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter));
   off_t footer_offset = part_dirlen(this) - footerlen;
   // try A
   off_t as = skip;
@@ -1037,7 +1045,7 @@ Part::init(char *s, off_t blocks, off_t 
   for (i = 0; i < 4; i++) {
     AIOCallback *aio = &(init_info->part_aio[i]);
     aio->aiocb.aio_fildes = fd;
-    aio->aiocb.aio_buf = &(init_info->part_h_f[i * INK_BLOCK_SIZE]);
+    aio->aiocb.aio_buf = &(init_info->part_h_f[i * STORE_BLOCK_SIZE]);
     aio->aiocb.aio_nbytes = footerlen;
     aio->action = this;
     aio->thread = this_ethread();
@@ -1065,7 +1073,7 @@ Part::handle_dir_clear(int event, void *
       /* clear the header for directory B. We don't need to clear the
          whole of directory B. The header for directory B starts at
          skip + len */
-      op->aiocb.aio_nbytes = ROUND_TO_BLOCK(sizeof(PartHeaderFooter));
+      op->aiocb.aio_nbytes = ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter));
       op->aiocb.aio_offset = skip + dir_len;
       ink_assert(ink_aio_write(op));
       return EVENT_DONE;
@@ -1097,7 +1105,7 @@ Part::handle_dir_read(int event, void *d
     return EVENT_DONE;
   }
   CHECK_DIR(this);
-
+  sector_size = header->sector_size;
   SET_HANDLER(&Part::handle_recover_from_data);
   return handle_recover_from_data(EVENT_IMMEDIATE, 0);
 }
@@ -1142,7 +1150,7 @@ int
 Part::handle_recover_from_data(int event, void *data)
 {
   (void) data;
-  int got_len = 0;
+  uint32 got_len = 0;
   uint32 max_sync_serial = header->sync_serial;
   char *s, *e;
   if (event == EVENT_IMMEDIATE) {
@@ -1180,9 +1188,9 @@ Part::handle_recover_from_data(int event
          were written to just before syncing the directory) and make sure
          that all documents have write_serial <= header->write_serial.
        */
-      int to_check = header->write_pos - header->last_write_pos;
-      ink_assert(to_check && to_check < (int) io.aiocb.aio_nbytes);
-      int done = 0;
+      uint32 to_check = header->write_pos - header->last_write_pos;
+      ink_assert(to_check && to_check < (uint32)io.aiocb.aio_nbytes);
+      uint32 done = 0;
       s = (char *) io.aiocb.aio_buf;
       while (done < to_check) {
         Doc *doc = (Doc *) (s + done);
@@ -1362,7 +1370,7 @@ Ldone:{
       aio->thread = AIO_CALLBACK_THREAD_ANY;
       aio->then = (i < 2) ? &(init_info->part_aio[i + 1]) : 0;
     }
-    int footerlen = ROUND_TO_BLOCK(sizeof(PartHeaderFooter));
+    int footerlen = ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter));
     int dirlen = part_dirlen(this);
     int B = header->sync_serial & 1;
     off_t ss = skip + (B ? dirlen : 0);
@@ -1507,7 +1515,7 @@ build_part_hash_table(CacheHostRecord *c
     }
     mapping[map] = i;
     p[map++] = cp->parts[i];
-    total += (cp->parts[i]->len >> INK_BLOCK_SHIFT);
+    total += (cp->parts[i]->len >> STORE_BLOCK_SHIFT);
   }
 
   num_parts -= bad_parts;
@@ -1529,7 +1537,7 @@ build_part_hash_table(CacheHostRecord *c
   unsigned short *ttable = (unsigned short *) xmalloc(sizeof(unsigned short) * PART_HASH_TABLE_SIZE);
 
   for (i = 0; i < num_parts; i++) {
-    forpart[i] = (PART_HASH_TABLE_SIZE * (p[i]->len >> INK_BLOCK_SHIFT)) / total;
+    forpart[i] = (PART_HASH_TABLE_SIZE * (p[i]->len >> STORE_BLOCK_SHIFT)) / total;
     used += forpart[i];
   }
   // spread around the excess
@@ -2680,6 +2688,8 @@ ink_cache_init(ModuleVersion v)
   IOCORE_EstablishStaticConfigInt32(cache_config_hit_evacuate_size_limit, "proxy.config.cache.hit_evacuate_size_limit");
   Debug("cache_init", "proxy.config.cache.hit_evacuate_size_limit = %d", cache_config_hit_evacuate_size_limit);
 #endif
+  IOCORE_RegisterConfigInteger(RECT_CONFIG, "proxy.config.cache.force_sector_size", 0, RECU_DYNAMIC, RECC_NULL, NULL);
+  IOCORE_EstablishStaticConfigInt32(cache_config_force_sector_size, "proxy.config.cache.force_sector_size");
 #ifdef HTTP_CACHE
   extern int url_hash_method;
   IOCORE_RegisterConfigInteger(RECT_CONFIG, "proxy.config.cache.url_hash_method", 1, RECU_RESTART_TS, RECC_NULL, NULL);

Modified: trafficserver/traffic/trunk/iocore/cache/CacheDir.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/CacheDir.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/CacheDir.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/CacheDir.cc Fri Sep 10 19:00:41 2010
@@ -538,7 +538,7 @@ Lagain:
           DDebug("dir_probe_hit", "found %X %X part %d bucket %d  boffset %d", key->word(0), key->word(1), d->fd, b, (int) dir_offset(e));
           dir_assign(result, e);
           *last_collision = e;
-          ink_assert(dir_offset(e) * INK_BLOCK_SIZE < d->len);
+          ink_assert(dir_offset(e) * CACHE_BLOCK_SIZE < d->len);
           return 1;
         } else {                // delete the invalid entry
           CACHE_DEC_DIR_USED(d->mutex);
@@ -905,8 +905,6 @@ sync_cache_dir_on_shutdown(void)
       Debug("cache_dir_sync", "Dir %s: ignoring -- bad disk", d->hash_id);
       continue;
     }
-    // Unused variable.
-    // int headerlen = ROUND_TO_BLOCK(sizeof(PartHeaderFooter));
     int dirlen = part_dirlen(d);
     if (!d->header->dirty && !d->dir_sync_in_progress) {
       Debug("cache_dir_sync", "Dir %s: ignoring -- not dirty", d->hash_id);
@@ -1020,7 +1018,7 @@ Lrestart:
     if (DISK_BAD(d->disk))
       goto Ldone;
 
-    int headerlen = ROUND_TO_BLOCK(sizeof(PartHeaderFooter));
+    int headerlen = ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter));
     int dirlen = part_dirlen(d);
     if (!writepos) {
       // start

Modified: trafficserver/traffic/trunk/iocore/cache/CacheDisk.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/CacheDisk.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/CacheDisk.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/CacheDisk.cc Fri Sep 10 19:00:41 2010
@@ -25,11 +25,12 @@
 
 
 int
-CacheDisk::open(char *s, off_t blocks, off_t dir_skip, int fildes, bool clear)
+CacheDisk::open(char *s, off_t blocks, off_t dir_skip, int ahw_sector_size, int fildes, bool clear)
 {
   path = xstrdup(s);
+  hw_sector_size = ahw_sector_size;
   fd = fildes;
-  skip = ROUND_TO_BLOCK((dir_skip < START_POS ? START_POS : dir_skip));
+  skip = ROUND_TO_STORE_BLOCK((dir_skip < START_POS ? START_POS : dir_skip));
   start_offset = dir_skip;
   start = skip;
   /* we can't use fractions of store blocks. */
@@ -51,7 +52,7 @@ CacheDisk::open(char *s, off_t blocks, o
   disk_parts = (DiskPart **) xmalloc((l / MIN_PART_SIZE + 1) * sizeof(DiskPart **));
 
   memset(disk_parts, 0, (l / MIN_PART_SIZE + 1) * sizeof(DiskPart **));
-  header_len = ROUND_TO_BLOCK(header_len);
+  header_len = ROUND_TO_STORE_BLOCK(header_len);
   start = skip + header_len;
   num_usable_blocks = (off_t(len * STORE_BLOCK_SIZE) - (start - start_offset)) >> STORE_BLOCK_SHIFT;
 

Modified: trafficserver/traffic/trunk/iocore/cache/CachePagesInternal.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/CachePagesInternal.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/CachePagesInternal.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/CachePagesInternal.cc Fri Sep 10 19:00:41 2010
@@ -271,9 +271,9 @@ ShowCacheInternal::showPartPartitions(in
   for (c = p->agg.head; c; c = (CacheVC *) c->link.next)
     agg_todo++;
   CHECK_SHOW(show("<tr>" "<td>%s</td>"  // ID
-                  "<td>%d</td>" // blocks
+                  "<td>%lld</td>" // blocks
                   "<td>%lld</td>" // directory entries
-                  "<td>%d</td>" // write position
+                  "<td>%lld</td>" // write position
                   "<td>%d</td>" // write agg to do
                   "<td>%d</td>" // write agg to do size
                   "<td>%d</td>" // write agg done
@@ -283,9 +283,9 @@ ShowCacheInternal::showPartPartitions(in
                   "<td>%u</td>" // write serial
                   "</tr>\n",
                   p->hash_id,
-                  (int) ((p->len - (p->start - p->skip)) / INK_BLOCK_SIZE),
+                  (uint64)((p->len - (p->start - p->skip)) / CACHE_BLOCK_SIZE),
                   (uint64)(p->buckets * DIR_DEPTH * p->segments),
-                  (int) ((p->header->write_pos - p->start) / INK_BLOCK_SIZE),
+                  (uint64)((p->header->write_pos - p->start) / CACHE_BLOCK_SIZE),
                   agg_todo,
                   p->agg_todo_size,
                   agg_done, p->header->phase, ctime, p->header->sync_serial, p->header->write_serial));

Modified: trafficserver/traffic/trunk/iocore/cache/CachePart.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/CachePart.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/CachePart.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/CachePart.cc Fri Sep 10 19:00:41 2010
@@ -140,7 +140,12 @@ CacheVC::scanObject(int event, Event * e
     int i;
     bool changed;
 
-    if (doc->magic != DOC_MAGIC || doc->ftype != CACHE_FRAG_TYPE_HTTP || !doc->hlen)
+    if (doc->magic != DOC_MAGIC) {
+      doc = (Doc *)((char *) doc + CACHE_BLOCK_SIZE);
+      continue;
+    }
+      
+    if (doc->ftype != CACHE_FRAG_TYPE_HTTP || !doc->hlen)
       goto Lskip;
 
     last_collision = NULL;
@@ -238,11 +243,11 @@ CacheVC::scanObject(int event, Event * e
         return scanOpenWrite(EVENT_NONE, 0);
       }
     }
-    doc = (Doc *) ((char *) doc + round_to_approx_size(doc->len));
+    doc = (Doc *) ((char *) doc + part->round_to_approx_size(doc->len));
     continue;
   Lskip:
 #endif
-    doc = (Doc *) ((char *) doc + INK_BLOCK_SIZE);
+    doc = (Doc *) ((char *) doc + part->round_to_approx_size(doc->len));
   }
 #ifdef HTTP_CACHE
   vector.clear();
@@ -335,7 +340,7 @@ CacheVC::scanOpenWrite(int event, Event 
     Dir *l = NULL;
     Dir d;
     Doc *doc = (Doc *) (buf->data() + offset);
-    offset = (char *) doc - buf->data() + round_to_approx_size(doc->len);
+    offset = (char *) doc - buf->data() + part->round_to_approx_size(doc->len);
     // if the doc contains some data, then we need to create
     // a new directory entry for this fragment. Remember the
     // offset and the key in earliest_key

Modified: trafficserver/traffic/trunk/iocore/cache/CacheWrite.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/CacheWrite.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/CacheWrite.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/CacheWrite.cc Fri Sep 10 19:00:41 2010
@@ -185,7 +185,7 @@ CacheVC::handleWrite(int event, Event *e
     frag_len = 0;
   set_agg_write_in_progress();
   POP_HANDLER;
-  agg_len = round_to_approx_size(write_len + header_len + frag_len + sizeofDoc);
+  agg_len = part->round_to_approx_size(write_len + header_len + frag_len + sizeofDoc);
   part->agg_todo_size += agg_len;
   bool agg_error =
     (agg_len > AGG_SIZE || header_len + sizeofDoc > MAX_FRAG_SIZE ||
@@ -335,8 +335,8 @@ Part::aggWriteDone(int event, Event *e)
     Debug("cache_disk_error", "Write error on disk %s\n \
               write range : [%llu - %llu bytes]  [%llu - %llu blocks] \n",
           hash_id, io.aiocb.aio_offset, io.aiocb.aio_offset + io.aiocb.aio_nbytes,
-          io.aiocb.aio_offset / INK_BLOCK_SIZE,
-          (io.aiocb.aio_offset + io.aiocb.aio_nbytes) / INK_BLOCK_SIZE);
+          io.aiocb.aio_offset / CACHE_BLOCK_SIZE,
+          (io.aiocb.aio_offset + io.aiocb.aio_nbytes) / CACHE_BLOCK_SIZE);
     Dir del_dir;
     dir_clear(&del_dir);
     for (int done = 0; done < agg_buf_pos;) {
@@ -591,7 +591,7 @@ Part::evacuateWrite(CacheVC *evacuator, 
 
   // push to front of aggregation write list, so it is written first
 
-  evacuator->agg_len = round_to_approx_size(((Doc *) evacuator->buf->data())->len);
+  evacuator->agg_len = round_to_approx_size(((Doc *)evacuator->buf->data())->len);
   agg_todo_size += evacuator->agg_len;
   /* insert the evacuator after all the other evacuators */
   CacheVC *cur = (CacheVC *) agg.head;
@@ -747,7 +747,7 @@ agg_copy(char *p, CacheVC *vc)
 
     uint32 len = vc->write_len + vc->header_len + vc->frag_len + sizeofDoc;
     ink_assert(vc->frag_type != CACHE_FRAG_TYPE_HTTP || len != sizeofDoc);
-    ink_debug_assert(round_to_approx_size(len) == vc->agg_len);
+    ink_debug_assert(part->round_to_approx_size(len) == vc->agg_len);
     // update copy of directory entry for this document
     dir_set_approx_size(&vc->dir, vc->agg_len);
     dir_set_offset(&vc->dir, offset_to_part_offset(part, o));
@@ -862,7 +862,7 @@ agg_copy(char *p, CacheVC *vc)
   } else {
     // for evacuated documents, copy the data, and update directory
     Doc *doc = (Doc *) vc->buf->data();
-    int l = round_to_approx_size(doc->len);
+    int l = vc->part->round_to_approx_size(doc->len);
     {
       ProxyMutex RELEASE_UNUSED *mutex = vc->part->mutex;
       ink_debug_assert(mutex->thread_holding == this_ethread());
@@ -906,7 +906,7 @@ Part::evacuate_cleanup_blocks(int i)
 void
 Part::evacuate_cleanup()
 {
-  int64 eo = ((header->write_pos - start) / INK_BLOCK_SIZE) + 1;
+  int64 eo = ((header->write_pos - start) / CACHE_BLOCK_SIZE) + 1;
   int64 e = dir_offset_evac_bucket(eo);
   int64 sx = e - (evacuate_size / PIN_SCAN_EVERY) - 1;
   int64 s = sx;

Modified: trafficserver/traffic/trunk/iocore/cache/I_CacheDefs.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/I_CacheDefs.h?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/I_CacheDefs.h (original)
+++ trafficserver/traffic/trunk/iocore/cache/I_CacheDefs.h Fri Sep 10 19:00:41 2010
@@ -33,10 +33,10 @@
 #define CACHE_ALT_INDEX_DEFAULT     -1
 #define CACHE_ALT_REMOVED           -2
 
-#define CACHE_DB_MAJOR_VERSION      20
+#define CACHE_DB_MAJOR_VERSION      21
 #define CACHE_DB_MINOR_VERSION      0
 
-#define CACHE_DIR_MAJOR_VERSION     17
+#define CACHE_DIR_MAJOR_VERSION     18
 #define CACHE_DIR_MINOR_VERSION     0
 
 #define CACHE_DB_FDS                128

Modified: trafficserver/traffic/trunk/iocore/cache/I_Store.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/I_Store.h?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/I_Store.h (original)
+++ trafficserver/traffic/trunk/iocore/cache/I_Store.h Fri Sep 10 19:00:41 2010
@@ -33,22 +33,19 @@
 
 #include "inktomi++.h"
 
-// ideally to match the system
-// should be the element of disk write atomicity
-// Many structures the cache will have to be changed if this constant changes
-# define STORE_BLOCK_SIZE       8192
-# define STORE_BLOCK_SHIFT      13
+#define STORE_BLOCK_SIZE       8192
+#define STORE_BLOCK_SHIFT      13
+#define DEFAULT_HW_SECTOR_SIZE 512
 
 //
 // A Store is a place to store data.
 // Those on the same disk should be in a linked list.
 //
-
 struct Span
 {
   char *pathname;
   int64 blocks;
-  int64 disk_block_size;
+  int64 hw_sector_size;
   bool file_pathname;           // the pathname is a file
   bool isRaw;
   int64 offset;                 // used only if (file == true)
@@ -58,37 +55,26 @@ struct Span
 private:
     bool is_mmapable_internal;
 public:
-    bool is_mmapable()
-  {
-    return is_mmapable_internal;
-  }
-  void set_mmapable(bool s)
-  {
-    is_mmapable_internal = s;
-  }
-  int64 size()
-  {
-    return blocks * STORE_BLOCK_SIZE;
-  }
+  bool is_mmapable() { return is_mmapable_internal; }
+  void set_mmapable(bool s) { is_mmapable_internal = s; }
+  int64 size() { return blocks * STORE_BLOCK_SIZE; }
 
-  int64 total_blocks()
-  {
+  int64 total_blocks() {
     if (link.next) {
       return blocks + link.next->total_blocks();
     } else {
       return blocks;
     }
   }
-  Span *nth(int i)
-  {
+
+  Span *nth(int i) {
     Span *x = this;
     while (x && i--)
       x = x->link.next;
     return x;
   }
 
-  int paths()
-  {
+  int paths() {
     int i = 0;
     for (Span * x = this; x; i++, x = x->link.next);
     return i;
@@ -97,22 +83,16 @@ public:
   int read(int fd);
 
   Span *dup();
-  int64 end()
-  {
-    return offset + blocks;
-  }
+  int64 end() { return offset + blocks; }
 
   const char *init(char *n, int64 size);
 
+  // 0 on success -1 on failure
   int path(char *filename,      // for non-file, the filename in the director
            int64 * offset,      // for file, start offset (unsupported)
            char *buf, int buflen);      // where to store the path
 
-  // 0 on success -1 on failure
-  // these operations are NOT thread-safe
-  //
-
-Span():pathname(NULL), blocks(0), disk_block_size(512), file_pathname(false),
+Span():pathname(NULL), blocks(0), hw_sector_size(DEFAULT_HW_SECTOR_SIZE), file_pathname(false),
     isRaw(true), offset(0), disk_id(0), is_mmapable_internal(false) {
   }
   ~Span();
@@ -122,16 +102,14 @@ struct Store
 {
   //
   // Public Interface
-  //
-
   // Thread-safe operations
+  //
 
   // spread evenly on all disks
   void spread_alloc(Store & s, unsigned int blocks, bool mmapable = true);
   void alloc(Store & s, unsigned int blocks, bool only_one = false, bool mmapable = true);
 
-  Span *alloc_one(unsigned int blocks, bool mmapable)
-  {
+  Span *alloc_one(unsigned int blocks, bool mmapable) {
     Store s;
       alloc(s, blocks, true, mmapable);
     if (s.n_disks)
@@ -185,8 +163,6 @@ struct Store
   int n_disks;
   Span **disk;
 
-  // 0 on success -1 on failure
-  // these operations are NOT thread-safe
   //
   // returns NULL on success
   // if fd >= 0 then on failure it returns an error string
@@ -205,12 +181,10 @@ void stealStore(Store & s, int blocks);
 
 int initialize_store();
 
-struct storageConfigFile
-{
-  const char *parseFile(int fd)
-  {
+struct storageConfigFile {
+  const char *parseFile(int fd) {
     Store tStore;
-      return tStore.read_config(fd);
+    return tStore.read_config(fd);
   }
 };
 

Modified: trafficserver/traffic/trunk/iocore/cache/P_CacheDir.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/P_CacheDir.h?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/P_CacheDir.h (original)
+++ trafficserver/traffic/trunk/iocore/cache/P_CacheDir.h Fri Sep 10 19:00:41 2010
@@ -46,7 +46,7 @@ struct CacheVC;
 #define DIR_SIZE_WIDTH                  6
 #define DIR_BLOCK_SIZES                 4
 #define DIR_BLOCK_SHIFT(_i)             (3*(_i))
-#define DIR_BLOCK_SIZE(_i)              (INK_BLOCK_SIZE << DIR_BLOCK_SHIFT(_i))
+#define DIR_BLOCK_SIZE(_i)              (CACHE_BLOCK_SIZE << DIR_BLOCK_SHIFT(_i))
 #define DIR_SIZE_WITH_BLOCK(_i)         ((1<<DIR_SIZE_WIDTH) * DIR_BLOCK_SIZE(_i))
 #define DIR_OFFSET_BITS                 40
 #define DIR_OFFSET_MAX                  ((((off_t)1) << DIR_OFFSET_BITS) - 1)
@@ -167,10 +167,10 @@ struct FreeDir
   }                                                              \
 } while (0)
 #define dir_approx_size(_e) ((dir_size(_e) + 1) * DIR_BLOCK_SIZE(dir_big(_e)))
-#define round_to_approx_size(_s) (_s <= DIR_SIZE_WITH_BLOCK(0) ? ROUND_TO(_s, DIR_BLOCK_SIZE(0)) : \
-                                  (_s <= DIR_SIZE_WITH_BLOCK(1) ? ROUND_TO(_s, DIR_BLOCK_SIZE(1)) : \
-                                   (_s <= DIR_SIZE_WITH_BLOCK(2) ? ROUND_TO(_s, DIR_BLOCK_SIZE(2)) : \
-                                    ROUND_TO(_s, DIR_BLOCK_SIZE(3)))))
+#define round_to_approx_dir_size(_s) (_s <= DIR_SIZE_WITH_BLOCK(0) ? ROUND_TO(_s, DIR_BLOCK_SIZE(0)) : \
+                                      (_s <= DIR_SIZE_WITH_BLOCK(1) ? ROUND_TO(_s, DIR_BLOCK_SIZE(1)) : \
+                                       (_s <= DIR_SIZE_WITH_BLOCK(2) ? ROUND_TO(_s, DIR_BLOCK_SIZE(2)) : \
+                                        ROUND_TO(_s, DIR_BLOCK_SIZE(3)))))
 #define dir_tag(_e) ((uint32)((_e)->w[2]&((1<<DIR_TAG_WIDTH)-1)))
 #define dir_set_tag(_e,_t) (_e)->w[2] = (uint16)(((_e)->w[2]&~((1<<DIR_TAG_WIDTH)-1)) | ((_t)&((1<<DIR_TAG_WIDTH)-1)))
 #define dir_phase(_e) dir_bit(_e,2,12)

Modified: trafficserver/traffic/trunk/iocore/cache/P_CacheDisk.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/P_CacheDisk.h?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/P_CacheDisk.h (original)
+++ trafficserver/traffic/trunk/iocore/cache/P_CacheDisk.h Fri Sep 10 19:00:41 2010
@@ -98,6 +98,7 @@ struct CacheDisk:public Continuation
   off_t skip;
   off_t start_offset;
   int num_usable_blocks;
+  int hw_sector_size;
   int fd;
   off_t free_space;
   off_t wasted_space;
@@ -115,28 +116,17 @@ struct CacheDisk:public Continuation
   }
 
    ~CacheDisk();
-  int open(char *s, off_t blocks, off_t dir_skip, int fildes, bool clear);
-
+  int open(char *s, off_t blocks, off_t dir_skip, int hw_sector_size, int fildes, bool clear);
   int clearDisk();
-
   int clearDone(int event, void *data);
-
   int openStart(int event, void *data);
-
   int openDone(int event, void *data);
-
   int sync();
-
   int syncDone(int event, void *data);
-
   DiskPartBlock *create_partition(int number, off_t size, int scheme);
-
   int delete_partition(int number);
-
   int delete_all_partitions();
-
   void update_header();
-
   DiskPart *get_diskpart(int part_number);
 
 };

Modified: trafficserver/traffic/trunk/iocore/cache/P_CacheInternal.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/P_CacheInternal.h?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/P_CacheInternal.h (original)
+++ trafficserver/traffic/trunk/iocore/cache/P_CacheInternal.h Fri Sep 10 19:00:41 2010
@@ -226,6 +226,7 @@ extern int cache_config_ram_cache_compre
 extern int cache_config_hit_evacuate_percent;
 extern int cache_config_hit_evacuate_size_limit;
 #endif
+extern int cache_config_force_sector_size;
 
 // CacheVC
 struct CacheVC: public CacheVConnection
@@ -995,8 +996,6 @@ struct Cache
 extern Cache *theCache;
 extern Cache *theStreamCache;
 inkcoreapi extern Cache *caches[NUM_CACHE_FRAG_TYPES];
-extern int cache_config_vary_on_user_agent;
-
 
 #ifdef HTTP_CACHE
 TS_INLINE Action *

Modified: trafficserver/traffic/trunk/iocore/cache/P_CachePart.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/P_CachePart.h?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/P_CachePart.h (original)
+++ trafficserver/traffic/trunk/iocore/cache/P_CachePart.h Fri Sep 10 19:00:41 2010
@@ -25,23 +25,24 @@
 #ifndef _P_CACHE_PART_H__
 #define _P_CACHE_PART_H__
 
-#define INK_BLOCK_SHIFT                 9
-#define INK_BLOCK_SIZE                  (1<<INK_BLOCK_SHIFT)
-#define ROUND_TO_BLOCK(_x)              INK_ALIGN((_x), INK_BLOCK_SIZE)
+#define CACHE_BLOCK_SHIFT               9
+#define CACHE_BLOCK_SIZE                (1<<CACHE_BLOCK_SHIFT) // 512, smallest sector size
+#define ROUND_TO_STORE_BLOCK(_x)        INK_ALIGN((_x), STORE_BLOCK_SIZE)
+#define ROUND_TO_CACHE_BLOCK(_x)        INK_ALIGN((_x), CACHE_BLOCK_SIZE)
+#define ROUND_TO_SECTOR(_p, _x)         INK_ALIGN((_x), _p->sector_size)
 #define ROUND_TO(_x, _y)                INK_ALIGN((_x), (_y))
 
 // Part
 
 #define PART_MAGIC                      0xF1D0F00D
 #define START_BLOCKS                    32      // 8k
-#define START_POS                       ((off_t)START_BLOCKS * INK_BLOCK_SIZE)
-#define AGG_HEADER_SIZE                 INK_BLOCK_SIZE
+#define START_POS                       ((off_t)START_BLOCKS * CACHE_BLOCK_SIZE)
 #define AGG_SIZE                        (4 * 1024 * 1024) // 4MB
 #define AGG_HIGH_WATER                  (AGG_SIZE / 2) // 2MB
 #define EVACUATION_SIZE                 (2 * AGG_SIZE)  // 8MB
 #define MAX_PART_SIZE                   ((off_t)512 * 1024 * 1024 * 1024 * 1024)
-#define STORE_BLOCKS_PER_DISK_BLOCK     (STORE_BLOCK_SIZE / INK_BLOCK_SIZE)
-#define MAX_PART_BLOCKS                 (MAX_PART_SIZE / INK_BLOCK_SIZE)
+#define STORE_BLOCKS_PER_CACHE_BLOCK    (STORE_BLOCK_SIZE / CACHE_BLOCK_SIZE)
+#define MAX_PART_BLOCKS                 (MAX_PART_SIZE / CACHE_BLOCK_SIZE)
 #define TARGET_FRAG_SIZE                (DEFAULT_MAX_BUFFER_SIZE - sizeofDoc)
 #define SHRINK_TARGET_FRAG_SIZE         (DEFAULT_MAX_BUFFER_SIZE + (DEFAULT_MAX_BUFFER_SIZE/4))
 #define MAX_FRAG_SIZE                   ((256 * 1024) - sizeofDoc)
@@ -58,7 +59,7 @@
 
 
 #define dir_offset_evac_bucket(_o) \
-  (_o / (EVACUATION_BUCKET_SIZE / INK_BLOCK_SIZE))
+  (_o / (EVACUATION_BUCKET_SIZE / CACHE_BLOCK_SIZE))
 #define dir_evac_bucket(_e) dir_offset_evac_bucket(dir_offset(_e))
 #define offset_evac_bucket(_d, _o) \
   dir_offset_evac_bucket((offset_to_part_offset(_d, _o)
@@ -92,6 +93,8 @@ struct PartHeaderFooter
   uint32 sync_serial;
   uint32 write_serial;
   uint32 dirty;
+  uint32 sector_size;
+  uint32 unused;                // pad out to 8 byte boundary
   uint16 freelist[1];
 };
 
@@ -171,6 +174,7 @@ struct Part:public Continuation
   CachePart *cache_part;
   uint32 last_sync_serial;
   uint32 last_write_serial;
+  uint32 sector_size;
   bool recover_wrapped;
   bool dir_sync_waiting;
   bool dir_sync_in_progress;
@@ -227,7 +231,7 @@ struct Part:public Continuation
   {
     io.aiocb.aio_fildes = AIO_NOT_IN_PROGRESS;
   }
-
+  
   int aggWriteDone(int event, Event *e);
   int aggWrite(int event, void *e);
   void agg_wrap();
@@ -243,6 +247,7 @@ struct Part:public Continuation
   void evacuate_cleanup();
   EvacuationBlock *force_evacuate_head(Dir *dir, int pinned);
   int within_hit_evacuate_window(Dir *dir);
+  uint32 round_to_approx_size(uint32 l);
 
   Part():Continuation(new_ProxyMutex()), path(NULL), fd(-1),
          dir(0), buckets(0), recover_pos(0), prev_recover_pos(0), scan_pos(0), skip(0), start(0),
@@ -297,17 +302,17 @@ struct Frag {
 // If you change this, change sizeofDoc above
 struct Doc
 {
-  uint32 magic;                 // DOC_MAGIC
-  uint32 len;                   // length of this segment
-  uint64 total_len;             // total length of document
-  INK_MD5 first_key;            // first key in document (http: vector)
+  uint32 magic;         // DOC_MAGIC
+  uint32 len;           // length of this segment (including hlen, flen & sizeof(Doc), unrounded)
+  uint64 total_len;     // total length of document
+  INK_MD5 first_key;    // first key in document (http: vector)
   INK_MD5 key;
-  uint32 hlen;                  // header length
-  uint32 ftype:8;               // fragment type CACHE_FRAG_TYPE_XX
-  uint32 flen:24;               // fragment table length
+  uint32 hlen;          // header length
+  uint32 ftype:8;       // fragment type CACHE_FRAG_TYPE_XX
+  uint32 flen:24;       // fragment table length
   uint32 sync_serial;
   uint32 write_serial;
-  uint32 pinned;                // pinned until
+  uint32 pinned;        // pinned until
   uint32 checksum;
 
   uint32 data_len();
@@ -333,13 +338,14 @@ extern unsigned short *part_hash_table;
 
 TS_INLINE int
 part_headerlen(Part *d) {
-  return ROUND_TO_BLOCK(sizeof(PartHeaderFooter) + sizeof(uint16) * (d->segments-1));
+  return ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter) + sizeof(uint16) * (d->segments-1));
 }
 TS_INLINE int
 part_dirlen(Part * d)
 {
-  return ROUND_TO_BLOCK(d->buckets * DIR_DEPTH * d->segments * SIZEOF_DIR) +
-    part_headerlen(d) + ROUND_TO_BLOCK(sizeof(PartHeaderFooter));
+  return part_headerlen(d) + 
+    ROUND_TO_STORE_BLOCK(d->buckets * DIR_DEPTH * d->segments * SIZEOF_DIR) +
+    ROUND_TO_STORE_BLOCK(sizeof(PartHeaderFooter));
 }
 TS_INLINE int
 part_direntries(Part * d)
@@ -349,37 +355,37 @@ part_direntries(Part * d)
 TS_INLINE int
 part_out_of_phase_valid(Part * d, Dir * e)
 {
-  return (dir_offset(e) - 1 >= ((d->header->agg_pos - d->start) / INK_BLOCK_SIZE));
+  return (dir_offset(e) - 1 >= ((d->header->agg_pos - d->start) / CACHE_BLOCK_SIZE));
 }
 TS_INLINE int
 part_out_of_phase_agg_valid(Part * d, Dir * e)
 {
-  return (dir_offset(e) - 1 >= ((d->header->agg_pos - d->start + AGG_SIZE) / INK_BLOCK_SIZE));
+  return (dir_offset(e) - 1 >= ((d->header->agg_pos - d->start + AGG_SIZE) / CACHE_BLOCK_SIZE));
 }
 TS_INLINE int
 part_out_of_phase_write_valid(Part * d, Dir * e)
 {
-  return (dir_offset(e) - 1 >= ((d->header->write_pos - d->start) / INK_BLOCK_SIZE));
+  return (dir_offset(e) - 1 >= ((d->header->write_pos - d->start) / CACHE_BLOCK_SIZE));
 }
 TS_INLINE int
 part_in_phase_valid(Part * d, Dir * e)
 {
-  return (dir_offset(e) - 1 < ((d->header->write_pos + d->agg_buf_pos - d->start) / INK_BLOCK_SIZE));
+  return (dir_offset(e) - 1 < ((d->header->write_pos + d->agg_buf_pos - d->start) / CACHE_BLOCK_SIZE));
 }
 TS_INLINE off_t
 part_offset(Part * d, Dir * e)
 {
-  return d->start + (off_t) dir_offset(e) * INK_BLOCK_SIZE - INK_BLOCK_SIZE;
+  return d->start + (off_t) dir_offset(e) * CACHE_BLOCK_SIZE - CACHE_BLOCK_SIZE;
 }
 TS_INLINE off_t
 offset_to_part_offset(Part * d, off_t pos)
 {
-  return ((pos - d->start + INK_BLOCK_SIZE) / INK_BLOCK_SIZE);
+  return ((pos - d->start + CACHE_BLOCK_SIZE) / CACHE_BLOCK_SIZE);
 }
 TS_INLINE off_t
 part_offset_to_offset(Part * d, off_t pos)
 {
-  return d->start + pos * INK_BLOCK_SIZE - INK_BLOCK_SIZE;
+  return d->start + pos * CACHE_BLOCK_SIZE - CACHE_BLOCK_SIZE;
 }
 TS_INLINE Dir *
 part_dir_segment(Part * d, int s)
@@ -483,7 +489,7 @@ TS_INLINE int
 Part::within_hit_evacuate_window(Dir * xdir)
 {
   off_t oft = dir_offset(xdir) - 1;
-  off_t write_off = (header->write_pos + AGG_SIZE - start) / INK_BLOCK_SIZE;
+  off_t write_off = (header->write_pos + AGG_SIZE - start) / CACHE_BLOCK_SIZE;
   off_t delta = oft - write_off;
   if (delta >= 0)
     return delta < hit_evacuate_window;
@@ -491,4 +497,10 @@ Part::within_hit_evacuate_window(Dir * x
     return -delta > (data_blocks - hit_evacuate_window) && -delta < data_blocks;
 }
 
+TS_INLINE uint32
+Part::round_to_approx_size(uint32 l) {
+  uint32 ll = round_to_approx_dir_size(l);
+  return ROUND_TO_SECTOR(this, ll);
+}
+
 #endif /* _P_CACHE_PART_H__ */

Modified: trafficserver/traffic/trunk/iocore/cache/Store.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/iocore/cache/Store.cc?rev=995934&r1=995933&r2=995934&view=diff
==============================================================================
--- trafficserver/traffic/trunk/iocore/cache/Store.cc (original)
+++ trafficserver/traffic/trunk/iocore/cache/Store.cc Fri Sep 10 19:00:41 2010
@@ -461,7 +461,7 @@ Span::init(char *an, int64 size)
   }
 #endif
 
-  disk_block_size = fs.f_bsize;
+  hw_sector_size = fs.f_bsize;
   int64 fsize = (int64) fs.f_blocks * (int64) fs.f_bsize;
 
   switch ((s.st_mode & S_IFMT)) {
@@ -616,11 +616,16 @@ Span::init(char *filename, int64 size)
   }
   Debug("cache_init", "Span::init - socketManager.open(\"%s\", O_RDONLY) = %d", filename, fd);
 
-  if (ioctl(fd, BLKSSZGET, &arg) == 0) {
-    disk_block_size = arg;
+#ifdef BLKPBSZGET
+  if (ioctl(fd, BLKPBSZGET, &arg) == 0)
+#else
+  if (ioctl(fd, BLKSSZGET, &arg) == 0)
+#endif
+  {
+    hw_sector_size = arg;
     is_disk = 1;
-    adjusted_sec = disk_block_size / 512;
-    Debug("cache_init", "Span::init - disk_block_size = %d,is_disk = %d,adjusted_sec = %d", filename, fd, adjusted_sec);
+    adjusted_sec = hw_sector_size / 512;
+    Debug("cache_init", "Span::init - %s hw_sector_size = %d,is_disk = %d,adjusted_sec = %d", filename, hw_sector_size, is_disk,adjusted_sec);
   }
 
   if (is_disk) {
@@ -649,13 +654,13 @@ Span::init(char *filename, int64 size)
 
     blocks = heads * sectors * cylinders;
 
-    if (size > 0 && blocks * disk_block_size != size) {
+    if (size > 0 && blocks * hw_sector_size != size) {
       Warning("Warning: you specified a size of %lld for %s,\n", size, filename);
-      Warning("but the device size is %lld. Using minimum of the two.\n", blocks * disk_block_size);
-      if (blocks * disk_block_size < size)
-        size = blocks * disk_block_size;
+      Warning("but the device size is %lld. Using minimum of the two.\n", blocks * hw_sector_size);
+      if (blocks * hw_sector_size < size)
+        size = blocks * hw_sector_size;
     } else {
-      size = blocks * disk_block_size;
+      size = blocks * hw_sector_size;
     }
 
     /* I don't know why I'm redefining blocks to be something that is quite