You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by bc...@apache.org on 2018/07/05 17:18:46 UTC
[trafficserver] 01/03: Search cache for urls matching the regex
input
This is an automated email from the ASF dual-hosted git repository.
bcall pushed a commit to branch 8.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 0ba368a52a04f9bcf581a05df46b3013b50512c9
Author: Persia Aziz <pe...@yahoo-inc.com>
AuthorDate: Wed May 2 10:04:06 2018 -0500
Search cache for urls matching the regex input
(cherry picked from commit 9aaee09eafe6e386515e7ec219079a6c967c1819)
---
src/traffic_cache_tool/CacheDefs.h | 47 ++++++++++++++++++++++++++++++-------
src/traffic_cache_tool/CacheScan.cc | 33 +++++++++++++++++---------
src/traffic_cache_tool/CacheScan.h | 13 +++++++---
src/traffic_cache_tool/CacheTool.cc | 22 ++++++++++++-----
4 files changed, 86 insertions(+), 29 deletions(-)
diff --git a/src/traffic_cache_tool/CacheDefs.h b/src/traffic_cache_tool/CacheDefs.h
index 6944a27..2ec599c 100644
--- a/src/traffic_cache_tool/CacheDefs.h
+++ b/src/traffic_cache_tool/CacheDefs.h
@@ -275,14 +275,45 @@ public:
class DFA;
// this class matches url of the format : scheme://hostname:port/path;params?query
+
struct url_matcher {
- // R"(^https?\:\/\/^[a-z A-Z 0-9]\.[a-z A-Z 0-9 \.]+)"
+ url_matcher(ts::FilePath const &path) // file contains a list of regex
+ {
+ ts::BulkFile cfile(path);
+ if (cfile.load() == 0) {
+ ts::TextView fileContent = cfile.content();
+ const char **patterns;
+ std::vector<std::string> str_vec;
+ int count = 0;
+ while (fileContent) {
+ ts::TextView line = fileContent.take_prefix_at('\n');
+ std::string reg_str(line.data(), line.size());
+ str_vec.push_back(reg_str);
+ count++;
+ }
+ patterns = (const char **)ats_malloc(count * sizeof(char *));
+ int i = 0;
+ for (auto str : str_vec) {
+ patterns[i++] = ats_strdup(str.data());
+
+ std::cout << "regex input\n" << patterns[i - 1] << std::endl;
+ }
+ for (i = 0; i < count; i++) {
+ std::cout << "regex " << patterns[i] << std::endl;
+ }
+ if (regex.compile(patterns, count) != 0) {
+ std::cout << "Check your regular expression" << std::endl;
+ }
+
+ if (port.compile(R"([0-9]+$)") != 0) {
+ std::cout << "Check your regular expression" << std::endl;
+ return;
+ }
+ }
+ }
+
url_matcher()
{
- /*if (regex.compile(R"(^https?\:\/\/^[a-z A-Z 0-9][\. a-z A-Z 0-9 ]+(\:[0-9]\/)?.*))") != 0) {
- std::cout<<"Check your regular expression"<<std::endl;
- }*/
- // (\w+\:[\w\W]+\@)? (:[0-9]+)?(\/.*)
if (regex.compile(R"(^(https?\:\/\/)") != 0) {
std::cout << "Check your regular expression" << std::endl;
return;
@@ -294,14 +325,14 @@ struct url_matcher {
}
~url_matcher() {}
+
uint8_t
match(const char *hostname) const
{
if (regex.match(hostname) != -1) {
return 1;
}
- // if(url_with_user.match(hostname) != -1)
- // return 2;
+
return 0;
}
uint8_t
@@ -310,8 +341,6 @@ struct url_matcher {
if (port.match(hostname, length) != -1) {
return 1;
}
- // if(url_with_user.match(hostname) != -1)
- // return 2;
return 0;
}
diff --git a/src/traffic_cache_tool/CacheScan.cc b/src/traffic_cache_tool/CacheScan.cc
index 2cbd471..06be535 100644
--- a/src/traffic_cache_tool/CacheScan.cc
+++ b/src/traffic_cache_tool/CacheScan.cc
@@ -34,7 +34,7 @@ const int HTTP_ALT_MARSHAL_SIZE = ROUND(sizeof(HTTPCacheAlt), HDR_PTR_SIZE);
namespace ct
{
Errata
-CacheScan::Scan()
+CacheScan::Scan(bool search)
{
int64_t guessed_size = 1048576; // 1M
Errata zret;
@@ -63,7 +63,7 @@ CacheScan::Scan()
std::cout << "Failed to read content from the Stripe. " << strerror(errno) << std::endl;
} else {
Doc *doc = reinterpret_cast<Doc *>(stripe_buff2);
- get_alternates(doc->hdr(), doc->hlen);
+ get_alternates(doc->hdr(), doc->hlen, search);
}
dir_bitset[dir_to_offset(e, seg)] = true;
e = next_dir(e, seg);
@@ -125,12 +125,12 @@ CacheScan::unmarshal(MIMEFieldBlockImpl *mf, intptr_t offset)
{
Errata zret;
HDR_UNMARSHAL_PTR(mf->m_next, MIMEFieldBlockImpl, offset);
-
+ ts::MemSpan mf_mem((char *)mf, mf->m_length);
for (uint32_t index = 0; index < mf->m_freetop; index++) {
MIMEField *field = &(mf->m_field_slots[index]);
// check if out of bounds
- if (((char *)field - (char *)mf) > mf->m_length) {
+ if (!mf_mem.contains((char *)field)) {
zret.push(0, 0, "Out of bounds memory in the deserialized MIMEFieldBlockImpl");
return zret;
}
@@ -358,7 +358,7 @@ CacheScan::check_url(ts::MemSpan &mem, URLImpl *url)
}
Errata
-CacheScan::get_alternates(const char *buf, int length)
+CacheScan::get_alternates(const char *buf, int length, bool search)
{
Errata zret;
ink_assert(!(((intptr_t)buf) & 3)); // buf must be aligned
@@ -378,7 +378,7 @@ CacheScan::get_alternates(const char *buf, int length)
} else if (!a->m_request_hdr.m_http) {
std::cerr << "no http object found in the request header object" << std::endl;
return zret;
- } else if (((char *)a->m_request_hdr.m_http - buf) > length) {
+ } else if (!doc_mem.contains((char *)a->m_request_hdr.m_http)) {
std::cerr << "out of bounds request header in the alternate" << std::endl;
return zret;
}
@@ -386,12 +386,23 @@ CacheScan::get_alternates(const char *buf, int length)
auto *url = a->m_request_hdr.m_http->u.req.m_url_impl;
if (check_url(doc_mem, url)) {
std::string str;
- ts::bwprint(str, "stripe: {} : {}://{}:{}/{};{}?{}", std::string_view(this->stripe->hashText),
- std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host),
- std::string_view(url->m_ptr_port, url->m_len_port), std::string_view(url->m_ptr_path, url->m_len_path),
- std::string_view(url->m_ptr_params, url->m_len_params), std::string_view(url->m_ptr_query, url->m_len_query));
- std::cout << str << std::endl;
+ if (search) {
+ ts::bwprint(str, "{}://{}:{}/{};{}?{}", std::string_view(url->m_ptr_scheme, url->m_len_scheme),
+ std::string_view(url->m_ptr_host, url->m_len_host), std::string_view(url->m_ptr_port, url->m_len_port),
+ std::string_view(url->m_ptr_path, url->m_len_path), std::string_view(url->m_ptr_params, url->m_len_params),
+ std::string_view(url->m_ptr_query, url->m_len_query));
+ if (u_matcher->match(str.data())) {
+ str = this->stripe->hashText + " " + str;
+ std::cout << "match found " << str << std::endl;
+ }
+ } else {
+ ts::bwprint(str, "stripe: {} : {}://{}:{}/{};{}?{}", std::string_view(this->stripe->hashText),
+ std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host),
+ std::string_view(url->m_ptr_port, url->m_len_port), std::string_view(url->m_ptr_path, url->m_len_path),
+ std::string_view(url->m_ptr_params, url->m_len_params), std::string_view(url->m_ptr_query, url->m_len_query));
+ std::cout << str << std::endl;
+ }
} else {
std::cerr << "The retrieved url object is invalid" << std::endl;
}
diff --git a/src/traffic_cache_tool/CacheScan.h b/src/traffic_cache_tool/CacheScan.h
index 2a359fb..3a7cdff 100644
--- a/src/traffic_cache_tool/CacheScan.h
+++ b/src/traffic_cache_tool/CacheScan.h
@@ -40,12 +40,19 @@ namespace ct
class CacheScan
{
Stripe *stripe;
+ url_matcher *u_matcher;
public:
- CacheScan(Stripe *str) : stripe(str){};
+ CacheScan(Stripe *str, ts::FilePath const &path) : stripe(str)
+ {
+ if (path.has_path()) {
+ u_matcher = new url_matcher(path);
+ }
+ };
+ CacheScan(Stripe *str) : stripe(str) {}
+ Errata Scan(bool search = false);
+ Errata get_alternates(const char *buf, int length, bool search);
int unmarshal(HdrHeap *hh, int buf_length, int obj_type, HdrHeapObjImpl **found_obj, RefCountObj *block_ref);
- Errata Scan();
- Errata get_alternates(const char *buf, int length);
Errata unmarshal(char *buf, int len, RefCountObj *block_ref);
Errata unmarshal(HTTPHdrImpl *obj, intptr_t offset);
Errata unmarshal(URLImpl *obj, intptr_t offset);
diff --git a/src/traffic_cache_tool/CacheTool.cc b/src/traffic_cache_tool/CacheTool.cc
index 7b1f4cb..c59f355 100644
--- a/src/traffic_cache_tool/CacheTool.cc
+++ b/src/traffic_cache_tool/CacheTool.cc
@@ -749,6 +749,7 @@ Span::loadDevice()
}
_len = _header->num_blocks;
} else {
+ zret = Errata::Message(0, 0, _path, " header is uninitialized or invalid");
std::cout << "Span: " << _path << " header is uninitialized or invalid" << std::endl;
_len = round_down(_geometry.totalsz) - _base;
}
@@ -1351,26 +1352,35 @@ Get_Response(FilePath const &input_file_path)
return zret;
}
-void static scan_span(Span *span)
+void static scan_span(Span *span, ts::FilePath const ®ex_path)
{
for (auto strp : span->_stripes) {
strp->loadMeta();
strp->loadDir();
- CacheScan cs(strp);
- cs.Scan();
+
+ if (regex_path.has_path()) {
+ CacheScan cs(strp, regex_path);
+ cs.Scan(true);
+ } else {
+ CacheScan cs(strp);
+ cs.Scan(false);
+ }
}
}
Errata
-Scan_Cache()
+Scan_Cache(ts::FilePath const ®ex_path)
{
Errata zret;
Cache cache;
std::vector<std::thread> threadPool;
if ((zret = cache.loadSpan(SpanFile))) {
+ if (zret.size()) {
+ return zret;
+ }
cache.dumpSpans(Cache::SpanDumpDepth::SPAN);
for (auto sp : cache._spans) {
- threadPool.emplace_back(scan_span, sp); // move constructor is necessary since std::thread is non copyable
+ threadPool.emplace_back(scan_span, sp, regex_path);
}
for (auto &th : threadPool)
th.join();
@@ -1443,7 +1453,7 @@ main(int argc, char *argv[])
Commands.add(std::string("init"), std::string(" Initializes uninitialized span"),
[&](int, char *argv[]) { return Init_disk(input_url_file); });
Commands.add(std::string("scan"), std::string(" Scans the whole cache and lists the urls of the cached contents"),
- [&](int, char *argv[]) { return Scan_Cache(); });
+ [&](int, char *argv[]) { return Scan_Cache(input_url_file); });
Commands.setArgIndex(optind);
if (help) {