You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2018/08/09 22:43:55 UTC

[trafficserver] 02/05: logstats conditionally disable format check

This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 8.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git

commit d8e2a401887dbe0b8ac31e914a25ebe664925424
Author: Gancho Tenev <ga...@apache.org>
AuthorDate: Thu Aug 9 10:46:52 2018 -0700

    logstats conditionally disable format check
    
    Don’t validate the log format field names according to the squid log format.
    This would allow squid format fields to be replaced, i.e. the username of
    the authenticated client caun with a random header value by using cqh,
    or to remove the client’s host IP address from the log for privacy reasons.
    
    Added command line option --no_format_check (default false) and some documentation.
    
    Related to https://issues.apache.org/jira/browse/TS-5069
    
    (cherry picked from commit 63cc2b0db8c40409e21a26dc9d55462828657099)
---
 .../command-line/traffic_logstats.en.rst           | 46 ++++++++++++++++++++++
 src/traffic_logstats/logstats.cc                   | 23 ++++++-----
 2 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/doc/appendices/command-line/traffic_logstats.en.rst b/doc/appendices/command-line/traffic_logstats.en.rst
index dfd4153..481c3d2 100644
--- a/doc/appendices/command-line/traffic_logstats.en.rst
+++ b/doc/appendices/command-line/traffic_logstats.en.rst
@@ -52,40 +52,86 @@ Options
 
 .. option:: -f FILE, --log_file FILE
 
+   Specific logfile to parse
+
 .. option:: -o LIST, --origin_list LIST
 
+   Only show stats for listed Origins
+
 .. option:: -O FILE, --origin_file FILE
 
+   File listing Origins to show
+
 .. option:: -M COUNT, --max_origins COUNT
 
+   Max number of Origins to show
+
 .. option:: -u COUNT, --urls COUNT
 
+   Produce JSON stats for URLs, argument is LRU size
+
 .. option:: -U COUNT, --show_urls COUNT
 
+   Only show max this number of URLs
+
 .. option:: -A, --as_object
 
+   Produce URL stats as a JSON object instead of array
+
 .. option:: -C, --concise
 
+   Eliminate metrics that can be inferred from other values
+
 .. option:: -i, --incremental
 
+   Incremental log parsing
+
 .. option:: -S FILE, --statetag FILE
 
+   Name of the state file to use
+
 .. option:: -t, --tail
 
+   Parse the last <sec> seconds of log
+
 .. option:: -s, --summary
 
+   Only produce the summary
+
 .. option:: -j, --json
 
+   Produce JSON formatted output
+
 .. option:: -c, --cgi
 
+   Produce HTTP headers suitable as a CGI
+
 .. option:: -m, --min_hits
 
+   Minimum total hits for an Origin
+
 .. option:: -a, --max_age
 
+   Max age for log entries to be considered
+
 .. option:: -l COUNT, --line_len COUNT
 
+   Output line length
+
 .. option:: -T TAGS, --debug_tags TAGS
 
+   Colon-Separated Debug Tags
+
+.. option:: -r, --report_per_user
+
+   Report stats per username of the authenticated client ``caun`` instead of host, see `squid log format <../../admin-guide/logging/examples.en.html#squid>`_
+
+.. option:: -n, --no_format_check
+
+   Don't validate the log format field names according to the `squid log format <../../admin-guide/logging/examples.en.html#squid>`_.
+   This would allow squid format fields to be replaced, i.e. the username of the authenticated client ``caun`` with a random header value by using ``cqh``,
+   or to remove the client's host IP address from the log for privacy reasons.
+
 .. option:: -h, --help
 
    Print usage information and exit.
diff --git a/src/traffic_logstats/logstats.cc b/src/traffic_logstats/logstats.cc
index bae5ef1..44cbb1e 100644
--- a/src/traffic_logstats/logstats.cc
+++ b/src/traffic_logstats/logstats.cc
@@ -614,6 +614,7 @@ struct CommandLineArgs {
   int as_object;       // Show the URL stats as a single JSON object (not array)
   int concise;         // Eliminate metrics that can be inferred by other values
   int report_per_user; // A flag to aggregate and report stats per user instead of per host if 'true' (default 'false')
+  int no_format_check; // A flag to skip the log format check if any of the fields is not a standard squid log format field.
 
   CommandLineArgs()
     : max_origins(0),
@@ -629,7 +630,8 @@ struct CommandLineArgs {
       show_urls(0),
       as_object(0),
       concise(0),
-      report_per_user(0)
+      report_per_user(0),
+      no_format_check(0)
   {
     log_file[0]    = '\0';
     origin_file[0] = '\0';
@@ -662,6 +664,7 @@ static ArgumentDescription argument_descriptions[] = {
   {"line_len", 'l', "Output line length", "I", &cl.line_len, nullptr, nullptr},
   {"debug_tags", 'T', "Colon-Separated Debug Tags", "S1023", &error_tags, nullptr, nullptr},
   {"report_per_user", 'r', "Report stats per user instead of host", "T", &cl.report_per_user, nullptr, nullptr},
+  {"no_format_check", 'n', "Don't validate the log format field names", "T", &cl.no_format_check, nullptr, nullptr},
   HELP_ARGUMENT_DESCRIPTION(),
   VERSION_ARGUMENT_DESCRIPTION(),
   RUNROOT_ARGUMENT_DESCRIPTION()};
@@ -1277,15 +1280,17 @@ parse_log_buff(LogBufferHeader *buf_header, bool summary = false, bool aggregate
     LogFormat::parse_symbol_string(buf_header->fmt_fieldlist(), fieldlist, &agg);
   }
 
-  // Validate the fieldlist
-  field                                = fieldlist->first();
-  const std::string_view test_fields[] = {"cqtq", "ttms", "chi", "crc", "pssc", "psql", "cqhm", "cquc", "caun", "phr", "shn"};
-  for (auto i : test_fields) {
-    if (i != field->symbol()) {
-      cerr << "Error parsing log file - expected field: " << i << ", but read field: " << field->symbol() << endl;
-      return 1;
+  if (!cl.no_format_check) {
+    // Validate the fieldlist
+    field                                = fieldlist->first();
+    const std::string_view test_fields[] = {"cqtq", "ttms", "chi", "crc", "pssc", "psql", "cqhm", "cquc", "caun", "phr", "shn"};
+    for (auto i : test_fields) {
+      if (i != field->symbol()) {
+        cerr << "Error parsing log file - expected field: " << i << ", but read field: " << field->symbol() << endl;
+        return 1;
+      }
+      field = fieldlist->next(field);
     }
-    field = fieldlist->next(field);
   }
 
   // Loop over all entries