You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by pa...@apache.org on 2018/05/03 20:03:18 UTC
[trafficserver] branch master updated: Add cache scan feature
This is an automated email from the ASF dual-hosted git repository.
paziz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 2fade4b Add cache scan feature
2fade4b is described below
commit 2fade4bc19b7a929fe8f5c4dc05dcf8e3616420e
Author: Persia Aziz <pe...@yahoo-inc.com>
AuthorDate: Mon Apr 9 10:09:47 2018 -0500
Add cache scan feature
---
cmd/traffic_cache_tool/CacheDefs.cc | 123 ++-----------
cmd/traffic_cache_tool/CacheDefs.h | 266 +++++++++++-----------------
cmd/traffic_cache_tool/CacheScan.cc | 339 ++++++++++++++++++++++++++++++++++++
cmd/traffic_cache_tool/CacheScan.h | 56 ++++++
cmd/traffic_cache_tool/CacheTool.cc | 35 ++++
cmd/traffic_cache_tool/Makefile.am | 6 +-
6 files changed, 553 insertions(+), 272 deletions(-)
diff --git a/cmd/traffic_cache_tool/CacheDefs.cc b/cmd/traffic_cache_tool/CacheDefs.cc
index ec83c67..f266fc9 100644
--- a/cmd/traffic_cache_tool/CacheDefs.cc
+++ b/cmd/traffic_cache_tool/CacheDefs.cc
@@ -283,11 +283,6 @@ Stripe::probeMeta(MemSpan &mem, StripeMeta const *base_meta)
return false;
}
-/* INK_ALIGN() is only to be used to align on a power of 2 boundary */
-#define INK_ALIGN(size, boundary) (((size) + ((boundary)-1)) & ~((boundary)-1))
-
-#define ROUND_TO_STORE_BLOCK(_x) INK_ALIGN((_x), 8192)
-
Errata
Stripe::updateHeaderFooter()
{
@@ -354,12 +349,6 @@ Stripe::updateHeaderFooter()
return zret;
}
-TS_INLINE int
-Stripe::vol_headerlen()
-{
- return ROUND_TO_STORE_BLOCK(sizeof(StripeMeta) + sizeof(uint16_t) * (this->_segments - 1));
-}
-
size_t
Stripe::vol_dirlen()
{
@@ -394,7 +383,6 @@ Stripe::updateLiveData(enum Copy c)
// int64_t n_buckets;
// int64_t n_segments;
- _content = _start;
/*
* COMMENTING THIS SECTION FOR NOW TO USE THE EXACT LOGIN USED IN ATS TO CALCULATE THE NUMBER OF SEGMENTS AND BUCKETS
// Past the header is the segment free list heads which if sufficiently long (> ~4K) can take
@@ -416,68 +404,12 @@ Stripe::updateLiveData(enum Copy c)
_directory._skip = header_len;
}
-#define dir_big(_e) ((uint32_t)((((_e)->w[1]) >> 8) & 0x3))
-#define dir_bit(_e, _w, _b) ((uint32_t)(((_e)->w[_w] >> (_b)) & 1))
-#define dir_size(_e) ((uint32_t)(((_e)->w[1]) >> 10))
-#define dir_approx_size(_e) ((dir_size(_e) + 1) * DIR_BLOCK_SIZE(dir_big(_e)))
-#define dir_head(_e) dir_bit(_e, 2, 13)
-#define DIR_MASK_TAG(_t) ((_t) & ((1 << DIR_TAG_WIDTH) - 1))
-#define dir_tag(_e) ((uint32_t)((_e)->w[2] & ((1 << DIR_TAG_WIDTH) - 1)))
-#define dir_offset(_e) \
- ((int64_t)(((uint64_t)(_e)->w[0]) | (((uint64_t)((_e)->w[1] & 0xFF)) << 16) | (((uint64_t)(_e)->w[4]) << 24)))
-#define dir_set_offset(_e, _o) \
- do { \
- (_e)->w[0] = (uint16_t)_o; \
- (_e)->w[1] = (uint16_t)((((_o) >> 16) & 0xFF) | ((_e)->w[1] & 0xFF00)); \
- (_e)->w[4] = (uint16_t)((_o) >> 24); \
- } while (0)
-//#define dir_segment(_s, _d) vol_dir_segment(_d, _s)
-#define dir_in_seg(_s, _i) ((CacheDirEntry *)(((char *)(_s)) + (SIZEOF_DIR * (_i))))
-#define dir_next(_e) (_e)->w[3]
-#define dir_phase(_e) dir_bit(_e, 2, 12)
-#define DIR_BLOCK_SHIFT(_i) (3 * (_i))
-#define DIR_BLOCK_SIZE(_i) (CACHE_BLOCK_SIZE << DIR_BLOCK_SHIFT(_i))
-#define dir_set_prev(_e, _o) (_e)->w[2] = (uint16_t)(_o)
-#define dir_set_next(_e, _o) (_e)->w[3] = (uint16_t)(_o)
-
bool
-dir_compare_tag(const CacheDirEntry *e, const INK_MD5 *key)
+dir_compare_tag(const CacheDirEntry *e, const CryptoHash *key)
{
return (dir_tag(e) == DIR_MASK_TAG(key->slice32(2)));
}
-TS_INLINE CacheDirEntry *
-Stripe::dir_segment(int s)
-{
- return vol_dir_segment(s);
-}
-
-TS_INLINE CacheDirEntry *
-Stripe::vol_dir_segment(int s)
-{
- return (CacheDirEntry *)(((char *)this->dir) + (s * this->_buckets) * DIR_DEPTH * SIZEOF_DIR);
-}
-
-TS_INLINE CacheDirEntry *
-dir_bucket(int64_t b, CacheDirEntry *seg)
-{
- return dir_in_seg(seg, b * DIR_DEPTH);
-}
-
-TS_INLINE CacheDirEntry *
-dir_from_offset(int64_t i, CacheDirEntry *seg)
-{
-#if DIR_DEPTH < 5
- if (!i) {
- return nullptr;
- }
- return dir_in_seg(seg, i);
-#else
- i = i + ((i - 1) / (DIR_DEPTH - 1));
- return dir_in_seg(seg, i);
-#endif
-}
-
TS_INLINE int
vol_in_phase_valid(Stripe *d, CacheDirEntry *e)
{
@@ -490,43 +422,16 @@ vol_out_of_phase_valid(Stripe *d, CacheDirEntry *e)
return (dir_offset(e) - 1 >= ((d->_meta[0][0].agg_pos - d->_start) / CACHE_BLOCK_SIZE));
}
-TS_INLINE CacheDirEntry *
-next_dir(CacheDirEntry *d, CacheDirEntry *seg)
-{
- int i = dir_next(d);
- return dir_from_offset(i, seg);
-}
-#define dir_offset(_e) \
- ((int64_t)(((uint64_t)(_e)->w[0]) | (((uint64_t)((_e)->w[1] & 0xFF)) << 16) | (((uint64_t)(_e)->w[4]) << 24)))
-
-TS_INLINE CacheDirEntry *
-dir_bucket_row(CacheDirEntry *b, int64_t i)
-{
- return dir_in_seg(b, i);
-}
-
-TS_INLINE int64_t
-dir_to_offset(const CacheDirEntry *d, const CacheDirEntry *seg)
-{
-#if DIR_DEPTH < 5
- return (((char *)d) - ((char *)seg)) / SIZEOF_DIR;
-#else
- int64_t i = (int64_t)((((char *)d) - ((char *)seg)) / SIZEOF_DIR);
- i = i - (i / DIR_DEPTH);
- return i;
-#endif
-}
-
bool
Stripe::dir_valid(CacheDirEntry *_e)
{
return (this->_meta[0][0].phase == dir_phase(_e) ? vol_in_phase_valid(this, _e) : vol_out_of_phase_valid(this, _e));
}
-int64_t
+Bytes
Stripe::stripe_offset(CacheDirEntry *e)
{
- return this->_content + (int64_t)(dir_offset(e) * CACHE_BLOCK_SIZE) - CACHE_BLOCK_SIZE;
+ return this->_content + Bytes((dir_offset(e) * CACHE_BLOCK_SIZE) - CACHE_BLOCK_SIZE);
}
int
@@ -538,15 +443,16 @@ Stripe::dir_probe(CryptoHash *key, CacheDirEntry *result, CacheDirEntry **last_c
CacheDirEntry *seg = this->dir_segment(segment);
CacheDirEntry *e = nullptr;
e = dir_bucket(bucket, seg);
- char *stripe_buff2 = (char *)malloc(dir_approx_size(e));
+ char *stripe_buff2 = nullptr;
Doc *doc = nullptr;
// TODO: collision craft is pending.. look at the main ATS code. Assuming no collision for now
if (dir_offset(e)) {
do {
if (dir_compare_tag(e, key)) {
if (dir_valid(e)) {
+ stripe_buff2 = (char *)ats_memalign(ats_pagesize(), dir_approx_size(e));
std::cout << "dir_probe hit: found seg: " << segment << " bucket: " << bucket << " offset: " << dir_offset(e)
- << std::endl;
+ << "size: " << dir_approx_size(e) << std::endl;
break;
} else {
// let's skip deleting for now
@@ -557,17 +463,16 @@ Stripe::dir_probe(CryptoHash *key, CacheDirEntry *result, CacheDirEntry **last_c
e = next_dir(e, seg);
} while (e);
- int fd = _span->_fd;
- int64_t offset = stripe_offset(e);
- int64_t size = dir_approx_size(e);
- ssize_t n = pread(fd, stripe_buff2, size, offset);
- if (n < size) {
- std::cout << "Failed to read content from the Stripe" << std::endl;
- }
+
+ int fd = _span->_fd;
+ Bytes offset = stripe_offset(e);
+ int64_t size = dir_approx_size(e);
+ ssize_t n = pread(fd, stripe_buff2, size, offset);
+ if (n < size)
+ std::cout << "Failed to read content from the Stripe:" << strerror(n) << std::endl;
doc = reinterpret_cast<Doc *>(stripe_buff2);
std::string hdr(doc->hdr(), doc->hlen);
- std::cout << "HEADER\n" << hdr << std::endl;
std::string data_(doc->data(), doc->data_len());
std::cout << "DATA\n" << data_ << std::endl;
@@ -581,7 +486,7 @@ Stripe::dir_probe(CryptoHash *key, CacheDirEntry *result, CacheDirEntry **last_c
CacheDirEntry *
Stripe::dir_delete_entry(CacheDirEntry *e, CacheDirEntry *p, int s)
{
- CacheDirEntry *seg = dir_segment(s);
+ CacheDirEntry *seg = this->dir_segment(s);
int no = dir_next(e);
this->_meta[0][0].dirty = 1;
if (p) {
diff --git a/cmd/traffic_cache_tool/CacheDefs.h b/cmd/traffic_cache_tool/CacheDefs.h
index e20a89e..2284de9 100644
--- a/cmd/traffic_cache_tool/CacheDefs.h
+++ b/cmd/traffic_cache_tool/CacheDefs.h
@@ -25,7 +25,6 @@
#include <netinet/in.h>
#include <iostream>
#include <ts/I_Version.h>
-#include <ts/INK_MD5.h>
#include <ts/Scalar.h>
#include <ts/Regex.h>
#include <tsconfig/Errata.h>
@@ -53,6 +52,9 @@ using ts::round_up;
namespace ts
{
+/* INK_ALIGN() is only to be used to align on a power of 2 boundary */
+#define INK_ALIGN(size, boundary) (((size) + ((boundary)-1)) & ~((boundary)-1))
+#define ROUND_TO_STORE_BLOCK(_x) INK_ALIGN((_x), 8192)
#define dir_clear(_e) \
do { \
(_e)->w[0] = 0; \
@@ -164,162 +166,19 @@ public:
uint16_t freelist[1];
};
-// struct HTTPCacheAlt
-struct HTTPCacheAlt {
- HTTPCacheAlt();
- void copy(HTTPCacheAlt *to_copy);
- void copy_frag_offsets_from(HTTPCacheAlt *src);
- void destroy();
-
- uint32_t m_magic;
-
- // Writeable is set to true is we reside
- // in a buffer owned by this structure.
- // INVARIANT: if own the buffer this HttpCacheAlt
- // we also own the buffers for the request &
- // response headers
- int32_t m_writeable;
- int32_t m_unmarshal_len;
-
- int32_t m_id;
- int32_t m_rid;
-
- int32_t m_object_key[4];
- int32_t m_object_size[2];
-
- // HTTPHdr m_request_hdr;
- // HTTPHdr m_response_hdr;
-
- time_t m_request_sent_time;
- time_t m_response_received_time;
-
- /// # of fragment offsets in this alternate.
- /// @note This is one less than the number of fragments.
- int m_frag_offset_count;
- /// Type of offset for a fragment.
- typedef uint64_t FragOffset;
- /// Table of fragment offsets.
- /// @note The offsets are forward looking so that frag[0] is the
- /// first byte past the end of fragment 0 which is also the first
- /// byte of fragment 1. For this reason there is no fragment offset
- /// for the last fragment.
- FragOffset *m_frag_offsets;
- /// # of fragment offsets built in to object.
- static int constexpr N_INTEGRAL_FRAG_OFFSETS = 4;
- /// Integral fragment offset table.
- FragOffset m_integral_frag_offsets[N_INTEGRAL_FRAG_OFFSETS];
-
- // With clustering, our alt may be in cluster
- // incoming channel buffer, when we are
- // destroyed we decrement the refcount
- // on that buffer so that it gets destroyed
- // We don't want to use a ref count ptr (Ptr<>)
- // since our ownership model requires explicit
- // destroys and ref count pointers defeat this
- // RefCountObj *m_ext_buffer;
-};
-
-//
-// HTTPCacheAlt::HTTPCacheAlt()
-// : m_magic(CACHE_ALT_MAGIC_ALIVE),
-// m_writeable(1),
-// m_unmarshal_len(-1),
-// m_id(-1),
-// m_rid(-1),
-// m_request_hdr(),
-// m_response_hdr(),
-// m_request_sent_time(0),
-// m_response_received_time(0),
-// m_frag_offset_count(0),
-// m_frag_offsets(nullptr),
-// m_ext_buffer(nullptr)
-//{
-// m_object_key[0] = 0;
-// m_object_key[1] = 0;
-// m_object_key[2] = 0;
-// m_object_key[3] = 0;
-// m_object_size[0] = 0;
-// m_object_size[1] = 0;
-//}
-//
-// void
-// HTTPCacheAlt::destroy()
-//{
-// ink_assert(m_magic == CACHE_ALT_MAGIC_ALIVE);
-// ink_assert(m_writeable);
-// m_magic = CACHE_ALT_MAGIC_DEAD;
-// m_writeable = 0;
-// m_request_hdr.destroy();
-// m_response_hdr.destroy();
-// m_frag_offset_count = 0;
-// if (m_frag_offsets && m_frag_offsets != m_integral_frag_offsets) {
-// ats_free(m_frag_offsets);
-// m_frag_offsets = nullptr;
-// }
-// httpCacheAltAllocator.free(this);
-//}
-//
-// void
-// HTTPCacheAlt::copy(HTTPCacheAlt *to_copy)
-//{
-// m_magic = to_copy->m_magic;
-// // m_writeable = to_copy->m_writeable;
-// m_unmarshal_len = to_copy->m_unmarshal_len;
-// m_id = to_copy->m_id;
-// m_rid = to_copy->m_rid;
-// m_object_key[0] = to_copy->m_object_key[0];
-// m_object_key[1] = to_copy->m_object_key[1];
-// m_object_key[2] = to_copy->m_object_key[2];
-// m_object_key[3] = to_copy->m_object_key[3];
-// m_object_size[0] = to_copy->m_object_size[0];
-// m_object_size[1] = to_copy->m_object_size[1];
-//
-// if (to_copy->m_request_hdr.valid()) {
-// m_request_hdr.copy(&to_copy->m_request_hdr);
-// }
-//
-// if (to_copy->m_response_hdr.valid()) {
-// m_response_hdr.copy(&to_copy->m_response_hdr);
-// }
-//
-// m_request_sent_time = to_copy->m_request_sent_time;
-// m_response_received_time = to_copy->m_response_received_time;
-// this->copy_frag_offsets_from(to_copy);
-//}
-//
-// void
-// HTTPCacheAlt::copy_frag_offsets_from(HTTPCacheAlt *src)
-//{
-// m_frag_offset_count = src->m_frag_offset_count;
-// if (m_frag_offset_count > 0) {
-// if (m_frag_offset_count > N_INTEGRAL_FRAG_OFFSETS) {
-// /* Mixed feelings about this - technically we don't need it to be a
-// power of two when copied because currently that means it is frozen.
-// But that could change later and it would be a nasty bug to find.
-// So we'll do it for now. The relative overhead is tiny.
-// */
-// int bcount = HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS * 2;
-// while (bcount < m_frag_offset_count) {
-// bcount *= 2;
-// }
-// m_frag_offsets = static_cast<FragOffset *>(ats_malloc(sizeof(FragOffset) * bcount));
-// } else {
-// m_frag_offsets = m_integral_frag_offsets;
-// }
-// memcpy(m_frag_offsets, src->m_frag_offsets, sizeof(FragOffset) * m_frag_offset_count);
-// }
-//}
-
-/*
- @internal struct Doc
- */
-
struct Doc {
- uint32_t magic; // DOC_MAGIC
- uint32_t len; // length of this fragment (including hlen & sizeof(Doc), unrounded)
- uint64_t total_len; // total length of document
- INK_MD5 first_key; ///< first key in object.
- INK_MD5 key; ///< Key for this doc.
+ uint32_t magic; // DOC_MAGIC
+ uint32_t len; // length of this fragment (including hlen & sizeof(Doc), unrounded)
+ uint64_t total_len; // total length of document
+#if TS_ENABLE_FIPS == 1
+ // For FIPS CryptoHash is 256 bits vs. 128, and the 'first_key' must be checked first, so
+ // ensure that the new 'first_key' overlaps the old 'first_key' and that the rest of the data layout
+ // is the same by putting 'key' at the ned.
+ CryptoHash first_key; ///< first key in object.
+#else
+ CryptoHash first_key; ///< first key in object.
+ CryptoHash key; ///< Key for this doc.
+#endif
uint32_t hlen; ///< Length of this header.
uint32_t doc_type : 8; ///< Doc type - indicates the format of this structure and its content.
uint32_t v_major : 8; ///< Major version number.
@@ -329,7 +188,9 @@ struct Doc {
uint32_t write_serial;
uint32_t pinned; // pinned until
uint32_t checksum;
-
+#if TS_ENABLE_FIPS == 1
+ CryptoHash key; ///< Key for this doc.
+#endif
uint32_t data_len();
uint32_t prefix_len();
int single_fragment();
@@ -490,6 +351,76 @@ constexpr int CACHE_BLOCK_SIZE = (1 << CACHE_BLOCK_SHIFT); // 512, smal
namespace ct
{
+#define dir_big(_e) ((uint32_t)((((_e)->w[1]) >> 8) & 0x3))
+#define dir_bit(_e, _w, _b) ((uint32_t)(((_e)->w[_w] >> (_b)) & 1))
+#define dir_size(_e) ((uint32_t)(((_e)->w[1]) >> 10))
+#define dir_approx_size(_e) ((dir_size(_e) + 1) * DIR_BLOCK_SIZE(dir_big(_e)))
+#define dir_head(_e) dir_bit(_e, 2, 13)
+#define DIR_MASK_TAG(_t) ((_t) & ((1 << DIR_TAG_WIDTH) - 1))
+#define dir_tag(_e) ((uint32_t)((_e)->w[2] & ((1 << DIR_TAG_WIDTH) - 1)))
+#define dir_offset(_e) \
+ ((int64_t)(((uint64_t)(_e)->w[0]) | (((uint64_t)((_e)->w[1] & 0xFF)) << 16) | (((uint64_t)(_e)->w[4]) << 24)))
+
+#define dir_set_offset(_e, _o) \
+ do { \
+ (_e)->w[0] = (uint16_t)_o; \
+ (_e)->w[1] = (uint16_t)((((_o) >> 16) & 0xFF) | ((_e)->w[1] & 0xFF00)); \
+ (_e)->w[4] = (uint16_t)((_o) >> 24); \
+ } while (0)
+
+#define dir_next(_e) (_e)->w[3]
+#define dir_phase(_e) dir_bit(_e, 2, 12)
+#define DIR_BLOCK_SHIFT(_i) (3 * (_i))
+#define DIR_BLOCK_SIZE(_i) (CACHE_BLOCK_SIZE << DIR_BLOCK_SHIFT(_i))
+#define dir_set_prev(_e, _o) (_e)->w[2] = (uint16_t)(_o)
+#define dir_set_next(_e, _o) (_e)->w[3] = (uint16_t)(_o)
+
+#define dir_in_seg(_s, _i) ((CacheDirEntry *)(((char *)(_s)) + (SIZEOF_DIR * (_i))))
+
+TS_INLINE CacheDirEntry *
+dir_from_offset(int64_t i, CacheDirEntry *seg)
+{
+#if DIR_DEPTH < 5
+ if (!i)
+ return 0;
+ return dir_in_seg(seg, i);
+#else
+ i = i + ((i - 1) / (DIR_DEPTH - 1));
+ return dir_in_seg(seg, i);
+#endif
+}
+
+TS_INLINE CacheDirEntry *
+dir_bucket(int64_t b, CacheDirEntry *seg)
+{
+ return dir_in_seg(seg, b * DIR_DEPTH);
+}
+
+TS_INLINE CacheDirEntry *
+next_dir(CacheDirEntry *d, CacheDirEntry *seg)
+{
+ int i = dir_next(d);
+ return dir_from_offset(i, seg);
+}
+
+TS_INLINE CacheDirEntry *
+dir_bucket_row(CacheDirEntry *b, int64_t i)
+{
+ return dir_in_seg(b, i);
+}
+
+TS_INLINE int64_t
+dir_to_offset(const CacheDirEntry *d, const CacheDirEntry *seg)
+{
+#if DIR_DEPTH < 5
+ return (((char *)d) - ((char *)seg)) / SIZEOF_DIR;
+#else
+ int64_t i = (int64_t)((((char *)d) - ((char *)seg)) / SIZEOF_DIR);
+ i = i - (i / DIR_DEPTH);
+ return i;
+#endif
+}
+
struct Stripe;
struct Span {
Span(FilePath const &path) : _path(path) {}
@@ -601,11 +532,24 @@ struct Stripe {
// This is because the freelist is not being copied to _metap[2][2] correctly.
// need to do something about it .. hmmm :-?
int dir_freelist_length(int s);
- TS_INLINE CacheDirEntry *dir_segment(int s);
- TS_INLINE CacheDirEntry *vol_dir_segment(int s);
- int64_t stripe_offset(CacheDirEntry *e); // offset of e w.r.t the stripe
+ TS_INLINE CacheDirEntry *
+ vol_dir_segment(int s)
+ {
+ return (CacheDirEntry *)(((char *)this->dir) + (s * this->_buckets) * DIR_DEPTH * SIZEOF_DIR);
+ }
+ TS_INLINE CacheDirEntry *
+ dir_segment(int s)
+ {
+ return vol_dir_segment(s);
+ }
+
+ Bytes stripe_offset(CacheDirEntry *e); // offset w.r.t the stripe content
size_t vol_dirlen();
- TS_INLINE int vol_headerlen();
+ TS_INLINE int
+ vol_headerlen()
+ {
+ return ROUND_TO_STORE_BLOCK(sizeof(StripeMeta) + sizeof(uint16_t) * (this->_segments - 1));
+ }
void vol_init_data_internal();
void vol_init_data();
void dir_init_segment(int s);
diff --git a/cmd/traffic_cache_tool/CacheScan.cc b/cmd/traffic_cache_tool/CacheScan.cc
new file mode 100644
index 0000000..9778309
--- /dev/null
+++ b/cmd/traffic_cache_tool/CacheScan.cc
@@ -0,0 +1,339 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include "CacheScan.h"
+#include "../../proxy/hdrs/HTTP.h"
+#include "../../proxy/hdrs/HdrHeap.h"
+#include "../../proxy/hdrs/MIME.h"
+#include "../../proxy/hdrs/URL.h"
+
+// using namespace ct;
+
+const int HTTP_ALT_MARSHAL_SIZE = ROUND(sizeof(HTTPCacheAlt), HDR_PTR_SIZE);
+
+namespace ct
+{
+Errata
+CacheScan::Scan()
+{
+ int64_t guessed_size = 1048576; // 1M
+ Errata zret;
+ char *stripe_buff2 = (char *)ats_memalign(ats_pagesize(), guessed_size);
+ for (int s = 0; s < this->stripe->_segments; s++) {
+ for (int b = 0; b < this->stripe->_buckets; b++) {
+ CacheDirEntry *seg = this->stripe->dir_segment(s);
+ CacheDirEntry *e = dir_bucket(b, seg);
+ if (dir_offset(e)) {
+ do {
+ int64_t size = dir_approx_size(e);
+ if (size > guessed_size) {
+ ats_free(stripe_buff2);
+ stripe_buff2 = (char *)ats_memalign(ats_pagesize(), dir_approx_size(e));
+ }
+ int fd = this->stripe->_span->_fd;
+ int64_t offset = this->stripe->stripe_offset(e);
+ ssize_t n = pread(fd, stripe_buff2, size, offset);
+ if (n < size)
+ std::cout << "Failed to read content from the Stripe. " << strerror(n) << std::endl;
+ Doc *doc = reinterpret_cast<Doc *>(stripe_buff2);
+ get_alternates(doc->hdr(), doc->hlen);
+
+ e = next_dir(e, seg);
+ } while (e);
+ }
+ }
+ }
+ ats_free(stripe_buff2);
+
+ return zret;
+}
+
+Errata
+CacheScan::unmarshal(HTTPHdrImpl *obj, intptr_t offset)
+{
+ Errata zret;
+ if (obj->m_polarity == HTTP_TYPE_REQUEST) {
+ HDR_UNMARSHAL_STR(obj->u.req.m_ptr_method, offset);
+ HDR_UNMARSHAL_PTR(obj->u.req.m_url_impl, URLImpl, offset);
+ } else if (obj->m_polarity == HTTP_TYPE_RESPONSE) {
+ HDR_UNMARSHAL_STR(obj->u.resp.m_ptr_reason, offset);
+ } else {
+ ink_release_assert(!"unknown m_polarity");
+ }
+
+ HDR_UNMARSHAL_PTR(obj->m_fields_impl, MIMEHdrImpl, offset);
+ return zret;
+}
+
+Errata
+CacheScan::unmarshal(MIMEHdrImpl *obj, intptr_t offset)
+{
+ Errata zret;
+ HDR_UNMARSHAL_PTR(obj->m_fblock_list_tail, MIMEFieldBlockImpl, offset);
+
+ HDR_UNMARSHAL_PTR(obj->m_first_fblock.m_next, MIMEFieldBlockImpl, offset);
+
+ for (uint32_t index = 0; index < obj->m_first_fblock.m_freetop; index++) {
+ MIMEField *field = &(obj->m_first_fblock.m_field_slots[index]);
+
+ if (field->is_live()) {
+ HDR_UNMARSHAL_STR(field->m_ptr_name, offset);
+ HDR_UNMARSHAL_STR(field->m_ptr_value, offset);
+ if (field->m_next_dup) {
+ HDR_UNMARSHAL_PTR(field->m_next_dup, MIMEField, offset);
+ }
+ } else {
+ // Clear out other types of slots
+ field->m_readiness = MIME_FIELD_SLOT_READINESS_EMPTY;
+ }
+ }
+ return zret;
+}
+
+Errata
+CacheScan::unmarshal(URLImpl *obj, intptr_t offset)
+{
+ Errata zret;
+ HDR_UNMARSHAL_STR(obj->m_ptr_scheme, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_user, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_password, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_host, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_port, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_path, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_params, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_query, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_fragment, offset);
+ HDR_UNMARSHAL_STR(obj->m_ptr_printed_string, offset);
+ return zret;
+}
+
+Errata
+CacheScan::unmarshal(HdrHeap *hh, int buf_length, int obj_type, HdrHeapObjImpl **found_obj, RefCountObj *block_ref)
+{
+ Errata zret;
+ *found_obj = nullptr;
+
+ // Check out this heap and make sure it is OK
+ if (hh->m_magic != HDR_BUF_MAGIC_MARSHALED) {
+ ink_assert(!"HdrHeap::unmarshal bad magic");
+ return zret;
+ }
+
+ int unmarshal_size = hh->unmarshal_size();
+ if (unmarshal_size > buf_length) {
+ ink_assert(!"HdrHeap::unmarshal truncated header");
+ return zret;
+ }
+
+ hh->m_free_start = nullptr;
+
+ ink_release_assert(hh->m_writeable == false);
+ ink_release_assert(hh->m_free_size == 0);
+ ink_release_assert(hh->m_ronly_heap[0].m_heap_start != nullptr);
+
+ ink_assert(hh->m_free_start == nullptr);
+
+ // Convert Heap offsets to pointers
+ hh->m_data_start = ((char *)hh) + (intptr_t)hh->m_data_start;
+ hh->m_free_start = ((char *)hh) + hh->m_size;
+ hh->m_ronly_heap[0].m_heap_start = ((char *)hh) + (intptr_t)hh->m_ronly_heap[0].m_heap_start;
+
+ // Crazy Invariant - If we are sitting in a ref counted block,
+ // the HdrHeap lifetime is externally determined. Whoever
+ // unmarshalls us should keep the block around as long as
+ // they want to use the header. However, the strings can
+ // live beyond the heap life time because they are copied
+ // by reference into other header heap therefore we need
+ // to the set the refcount ptr for the strings. We don't
+ // actually increase the refcount here since for the header
+ // the lifetime is explicit but copies will increase
+ // the refcount
+ if (block_ref) {
+ hh->m_ronly_heap[0].m_ref_count_ptr.swizzle(block_ref);
+ }
+
+ // Loop over objects and swizzle there pointer to
+ // live offsets
+ char *obj_data = hh->m_data_start;
+ intptr_t offset = (intptr_t)hh;
+
+ while (obj_data < hh->m_free_start) {
+ HdrHeapObjImpl *obj = (HdrHeapObjImpl *)obj_data;
+ ink_assert(obj_is_aligned(obj));
+
+ if (obj->m_type == (unsigned)obj_type && *found_obj == nullptr) {
+ *found_obj = obj;
+ }
+ // TODO : fix this switch
+ switch (obj->m_type) {
+ case HDR_HEAP_OBJ_HTTP_HEADER:
+ this->unmarshal((HTTPHdrImpl *)obj, offset);
+ break;
+ case HDR_HEAP_OBJ_URL:
+ this->unmarshal((URLImpl *)obj, offset);
+ break;
+ // case HDR_HEAP_OBJ_FIELD_BLOCK:
+ // this->unmarshal((MIMEFieldBlockImpl *)obj,offset);
+ // break;
+ case HDR_HEAP_OBJ_MIME_HEADER:
+ this->unmarshal((MIMEHdrImpl *)obj, offset);
+ break;
+ // case HDR_HEAP_OBJ_EMPTY:
+ // // Nothing to do
+ // break;
+ default:
+ zret.push(0, 0, "WARNING: Unmarshal failed due to unknow obj type ", (int)obj->m_type, " after ",
+ (int)(obj_data - (char *)this), " bytes");
+ // dump_heap(unmarshal_size);
+ return zret;
+ }
+
+ obj_data = obj_data + obj->m_length;
+ }
+
+ hh->m_magic = HDR_BUF_MAGIC_ALIVE;
+
+ // hh->unmarshal_size = ROUND(unmarshal_size, HDR_PTR_SIZE);
+ return zret;
+}
+
+Errata
+CacheScan::unmarshal(char *buf, int len, RefCountObj *block_ref)
+{
+ Errata zret;
+ HTTPCacheAlt *alt = (HTTPCacheAlt *)buf;
+ int orig_len = len;
+
+ if (alt->m_magic == CACHE_ALT_MAGIC_ALIVE) {
+ // Already unmarshaled, must be a ram cache
+ // it
+ ink_assert(alt->m_unmarshal_len > 0);
+ ink_assert(alt->m_unmarshal_len <= len);
+ return zret;
+ } else if (alt->m_magic != CACHE_ALT_MAGIC_MARSHALED) {
+ ink_assert(!"HTTPInfo::unmarshal bad magic");
+ return zret;
+ }
+
+ ink_assert(alt->m_unmarshal_len < 0);
+ alt->m_magic = CACHE_ALT_MAGIC_ALIVE;
+ ink_assert(alt->m_writeable == 0);
+ len -= HTTP_ALT_MARSHAL_SIZE;
+
+ if (alt->m_frag_offset_count > HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS) {
+ // stuff that didn't fit in the integral slots.
+ int extra = sizeof(uint64_t) * alt->m_frag_offset_count - sizeof(alt->m_integral_frag_offsets);
+ char *extra_src = buf + reinterpret_cast<intptr_t>(alt->m_frag_offsets);
+ // Actual buffer size, which must be a power of two.
+ // Well, technically not, because we never modify an unmarshalled fragment
+ // offset table, but it would be a nasty bug should that be done in the
+ // future.
+ int bcount = HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS * 2;
+
+ while (bcount < alt->m_frag_offset_count) {
+ bcount *= 2;
+ }
+ alt->m_frag_offsets =
+ static_cast<uint64_t *>(ats_malloc(bcount * sizeof(uint64_t))); // WRONG - must round up to next power of 2.
+ memcpy(alt->m_frag_offsets, alt->m_integral_frag_offsets, sizeof(alt->m_integral_frag_offsets));
+ memcpy(alt->m_frag_offsets + HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS, extra_src, extra);
+ len -= extra;
+ } else if (alt->m_frag_offset_count > 0) {
+ alt->m_frag_offsets = alt->m_integral_frag_offsets;
+ } else {
+ alt->m_frag_offsets = nullptr; // should really already be zero.
+ }
+
+ // request hdrs
+
+ HdrHeap *heap = (HdrHeap *)(alt->m_request_hdr.m_heap ? (buf + (intptr_t)alt->m_request_hdr.m_heap) : nullptr);
+ HTTPHdrImpl *hh = nullptr;
+ int tmp;
+ if (heap != nullptr) {
+ tmp = this->unmarshal(heap, len, HDR_HEAP_OBJ_HTTP_HEADER, (HdrHeapObjImpl **)&hh, block_ref);
+ if (hh == nullptr || tmp < 0) {
+ ink_assert(!"HTTPInfo::request unmarshal failed");
+ return zret;
+ }
+ len -= tmp;
+ alt->m_request_hdr.m_heap = heap;
+ alt->m_request_hdr.m_http = hh;
+ alt->m_request_hdr.m_mime = hh->m_fields_impl;
+ alt->m_request_hdr.m_url_cached.m_heap = heap;
+ }
+
+ // response hdrs
+
+ heap = (HdrHeap *)(alt->m_response_hdr.m_heap ? (buf + (intptr_t)alt->m_response_hdr.m_heap) : nullptr);
+ if (heap != nullptr) {
+ // tmp = heap->unmarshal(len, HDR_HEAP_OBJ_HTTP_HEADER, (HdrHeapObjImpl **)&hh, block_ref);
+ if (hh == nullptr || tmp < 0) {
+ ink_assert(!"HTTPInfo::response unmarshal failed");
+ return zret;
+ }
+ len -= tmp;
+
+ alt->m_response_hdr.m_heap = heap;
+ alt->m_response_hdr.m_http = hh;
+ alt->m_response_hdr.m_mime = hh->m_fields_impl;
+ }
+
+ alt->m_unmarshal_len = orig_len - len;
+
+ return zret;
+}
+
+Errata
+CacheScan::get_alternates(const char *buf, int length)
+{
+ Errata zret;
+ ink_assert(!(((intptr_t)buf) & 3)); // buf must be aligned
+
+ char *start = (char *)buf;
+ RefCountObj *block_ref = nullptr;
+
+ while (length - (buf - start) > (int)sizeof(HTTPCacheAlt)) {
+ HTTPCacheAlt *a = (HTTPCacheAlt *)buf;
+
+ if (a->m_magic == CACHE_ALT_MAGIC_MARSHALED) {
+ this->unmarshal((char *)buf, length, block_ref);
+ // std::cout << "alternate unmarshal failed" << std::endl;
+ // }
+ auto *url = a->m_request_hdr.m_http->u.req.m_url_impl;
+ std::string str = "stripe: " + this->stripe->hashText + " : " + std::string(url->m_ptr_scheme, url->m_len_scheme) + "://" +
+ std::string(url->m_ptr_host, url->m_len_host) + ":" + std::string(url->m_ptr_port, url->m_len_port) + "/" +
+ std::string(url->m_ptr_path, url->m_len_path) + ";" + std::string(url->m_ptr_params, url->m_len_params) +
+ "?" + std::string(url->m_ptr_query, url->m_len_query);
+ std::cout << str << std::endl;
+ } else {
+ // std::cout << "alternate retrieval failed" << std::endl;
+ break;
+ }
+
+ buf += a->m_unmarshal_len;
+ }
+
+ return zret;
+}
+
+} // end namespace ct
\ No newline at end of file
diff --git a/cmd/traffic_cache_tool/CacheScan.h b/cmd/traffic_cache_tool/CacheScan.h
new file mode 100644
index 0000000..b0f2a00
--- /dev/null
+++ b/cmd/traffic_cache_tool/CacheScan.h
@@ -0,0 +1,56 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+/*
+ * File: CacheScan.h
+ * Author: persia
+ *
+ * Created on April 4, 2018, 10:06 AM
+ */
+
+#ifndef CACHESCAN_H
+#define CACHESCAN_H
+#include <thread>
+#include <unordered_map>
+#include "CacheDefs.h"
+
+#include "../../proxy/hdrs/HTTP.h"
+// using namespace ct;
+namespace ct
+{
+class CacheScan
+{
+ Stripe *stripe;
+
+public:
+ CacheScan(Stripe *str) : stripe(str){};
+ Errata Scan();
+ Errata get_alternates(const char *buf, int length);
+ Errata unmarshal(char *buf, int len, RefCountObj *block_ref);
+ Errata unmarshal(HdrHeap *hh, int buf_length, int obj_type, HdrHeapObjImpl **found_obj, RefCountObj *block_ref);
+ Errata unmarshal(HTTPHdrImpl *obj, intptr_t offset);
+ Errata unmarshal(MIMEHdrImpl *obj, intptr_t offset);
+ Errata unmarshal(URLImpl *obj, intptr_t offset);
+};
+} // namespace ct
+
+#endif /* CACHESCAN_H */
diff --git a/cmd/traffic_cache_tool/CacheTool.cc b/cmd/traffic_cache_tool/CacheTool.cc
index 09126ce..84e4e5f 100644
--- a/cmd/traffic_cache_tool/CacheTool.cc
+++ b/cmd/traffic_cache_tool/CacheTool.cc
@@ -42,10 +42,12 @@
#include <ts/ink_file.h>
#include <ts/BufferWriter.h>
#include <ts/CryptoHash.h>
+#include <thread>
#include "File.h"
#include "CacheDefs.h"
#include "Command.h"
+#include "CacheScan.h"
using ts::Bytes;
using ts::Megabytes;
@@ -1348,6 +1350,37 @@ Get_Response(FilePath const &input_file_path)
return zret;
}
+void static scan_span(Span *span)
+{
+ for (auto strp : span->_stripes) {
+ strp->loadMeta();
+ strp->loadDir();
+ strp->walk_all_buckets();
+ CacheScan cs(strp);
+ cs.Scan();
+
+ // break; // to be removed
+ }
+}
+
+Errata
+Scan_Cache()
+{
+ Errata zret;
+ Cache cache;
+ std::vector<std::thread> threadPool;
+ if ((zret = cache.loadSpan(SpanFile))) {
+ cache.dumpSpans(Cache::SpanDumpDepth::SPAN);
+ for (auto sp : cache._spans) {
+ threadPool.emplace_back(scan_span, sp); // move constructor is necessary since std::thread is non copyable
+ // break; // to be removed
+ }
+ for (auto &th : threadPool)
+ th.join();
+ }
+ return zret;
+}
+
int
main(int argc, char *argv[])
{
@@ -1412,6 +1445,8 @@ main(int argc, char *argv[])
[&](int, char *argv[]) { return Get_Response(input_url_file); });
Commands.add(std::string("init"), std::string(" Initializes uninitialized span"),
[&](int, char *argv[]) { return Init_disk(input_url_file); });
+ Commands.add(std::string("scan"), std::string(" Scans the whole cache and lists the urls of the cached contents"),
+ [&](int, char *argv[]) { return Scan_Cache(); });
Commands.setArgIndex(optind);
if (help) {
diff --git a/cmd/traffic_cache_tool/Makefile.am b/cmd/traffic_cache_tool/Makefile.am
index 02c57e1..c8c4c36 100644
--- a/cmd/traffic_cache_tool/Makefile.am
+++ b/cmd/traffic_cache_tool/Makefile.am
@@ -17,13 +17,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+
include $(top_srcdir)/build/tidy.mk
AM_CPPFLAGS = -I $(top_srcdir)/lib -D__STDC_FORMAT_MACROS
noinst_PROGRAMS = traffic_cache_tool
-traffic_cache_tool_SOURCES = CacheDefs.h CacheDefs.cc CacheTool.cc File.h File.cc Command.h Command.cc
+traffic_cache_tool_SOURCES = CacheDefs.h CacheDefs.cc CacheTool.cc File.h File.cc Command.h Command.cc CacheScan.h CacheScan.cc
traffic_cache_tool_LDADD = \
$(top_builddir)/lib/ts/.libs/ink_assert.o \
$(top_builddir)/lib/ts/.libs/ink_code.o \
@@ -37,7 +38,8 @@ traffic_cache_tool_LDADD = \
$(top_builddir)/lib/ts/.libs/Regex.o \
$(top_builddir)/lib/ts/.libs/CryptoHash.o \
$(top_builddir)/lib/ts/.libs/MMH.o \
- @OPENSSL_LIBS@ @LIBPCRE@
+ @OPENSSL_LIBS@ @LIBPCRE@ @LIBTCL@
+
all-am: Makefile $(PROGRAMS)
--
To stop receiving notification emails like this one, please contact
paziz@apache.org.