You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by ch...@apache.org on 2018/11/27 16:21:37 UTC
qpid-dispatch git commit: DISPATCH-1191: Fixes based on real-world
large log files
Repository: qpid-dispatch
Updated Branches:
refs/heads/master 5c3411a1a -> 75c9763a9
DISPATCH-1191: Fixes based on real-world large log files
* Add a large-file splitter to break files into manageable chunks
* Fix Attach parser to accept QpidJMS nested described types and PN_SYMBOL
* Add command line argparse
** Select split or merge program mode
** Disable over-the-top analysis to reduce output on large input files
* Remove file split.py to avoid confusion with log_split.py
Project: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/repo
Commit: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/commit/75c9763a
Tree: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/tree/75c9763a
Diff: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/diff/75c9763a
Branch: refs/heads/master
Commit: 75c9763a9cf28dacd87bec84bfd7d0e9748f6dca
Parents: 5c3411a
Author: Chuck Rolke <cr...@redhat.com>
Authored: Tue Nov 27 11:14:27 2018 -0500
Committer: Chuck Rolke <cr...@redhat.com>
Committed: Tue Nov 27 11:14:27 2018 -0500
----------------------------------------------------------------------
bin/log_scraper/common.py | 13 +-
bin/log_scraper/log_splitter.py | 426 +++++++++++++++++++++++++++
bin/log_scraper/main.py | 57 ++--
bin/log_scraper/parser.py | 118 +++++++-
bin/log_scraper/splitter.py | 94 ------
bin/log_scraper/test_data/test_data.txt | 2 +
6 files changed, 581 insertions(+), 129 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/common.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/common.py b/bin/log_scraper/common.py
index d570024..0a74f3c 100755
--- a/bin/log_scraper/common.py
+++ b/bin/log_scraper/common.py
@@ -40,11 +40,14 @@ else:
class Common():
- # arg - index transfer data or not
- # If a log file has 100M transfers then adverbl dies.
- # With program arg --no-data then data indexing is turned off but
- # the output still shows connections, links, and link state costs.
- arg_index_data = True
+ # analysis_level_ludicrous
+ # Adverbl tries too hard to cross reference data
+ # Use these switchs to turn some of the biggest offenders off
+ per_link_detail = True
+ message_progress_tables = False
+
+ # returned from argparse.parse_args()
+ args = None
# first letter of the connection names
log_char_base = 'A'
http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/log_splitter.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/log_splitter.py b/bin/log_scraper/log_splitter.py
new file mode 100755
index 0000000..c83e385
--- /dev/null
+++ b/bin/log_scraper/log_splitter.py
@@ -0,0 +1,426 @@
+#!/usr/bin/env python
+
+# Split a gigantic (or not) log file into files of traffic for each connection.
+# Identify probable router and broker connections, QpidJMS client connections,
+# and AMQP errors. Create lists of connections sorted by log line and by transfer counts.
+# Emit a web page summarizing the results.
+
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from __future__ import print_function
+
+import cgi
+import os
+import sys
+import traceback
+from collections import defaultdict
+
+
+class connection():
+ def __init__(self, instance, conn_id, logfile):
+ self.instance = instance
+ self.conn_id = conn_id
+ self.logfile = logfile
+ self.lines = []
+ self.key_name = connection.keyname(instance, conn_id)
+ self.transfers = 0
+ self.peer_open = ""
+ self.peer_type = ""
+ self.log_n_lines = 0
+ self.log_n_dir = ""
+ self.file_name = ""
+ self.path_name = ""
+
+ @staticmethod
+ def keyname(instance, conn_id):
+ tmp = "0000000" + str(conn_id)
+ return str(instance) + "." + tmp[-8:]
+
+ def disp_name(self):
+ return str(self.instance) + "_" + str(self.conn_id)
+
+ def generate_paths(self):
+ self.log_n_dir = "10e%d" % self.log_n_lines
+ self.file_name = self.disp_name() + ".log"
+ self.path_name = self.log_n_dir + "/" + self.file_name
+
+
+class LogFile:
+ def __init__(self, fn, top_n=24):
+ """
+ Represent connections in a file
+ :param fn: file name
+ :param
+ """
+ self.log_fn = fn # file name
+ self.top_n = top_n # how many to report
+ self.instance = 0 # incremented when router restarts in log file
+ self.amqp_lines = 0 # server trace lines
+ self.transfers = 0 # server transfers
+
+ # restarts
+ self.restarts = []
+
+ # connections
+ # dictionary of connection data
+ # key = connection id: <instance>.<conn_id> "0.3"
+ # val = connection class object
+ self.connections = {}
+
+ # router_connections
+ # list of received opens that suggest a router at the other end
+ self.router_connections = []
+
+ # broker connections
+ # list of received opens that suggest a broker at the other end
+ self.broker_connections = []
+
+ # errors
+ # amqp errors in time order
+ self.errors = []
+
+ # conns_by_size_transfer
+ # all connections in transfer size descending order
+ self.conns_by_size_transfer = []
+
+ # conns_by_size_loglines
+ # all connections in log_lines size descending order
+ self.conns_by_size_loglines = []
+
+ # histogram - count of connections with N logs < 10^index
+ # [0] = N < 10^0
+ # [1] = N < 10^1
+ self.histogram = [0,0,0,0,0,0,0,0,0,0]
+ self.hist_max = len(self.histogram) - 1
+
+ def parse_identify(self, text, line, before_col=70):
+ """
+ Look for text in line but make sure it's not in the body of some message,
+ :param text:
+ :param line:
+ :param before_col: limit on how far to search into line
+ """
+ st = line.find(text, 0, (before_col + len(text)))
+ if st < 0:
+ return False
+ return st < 70
+
+ def parse_line(self, line):
+ """
+ Do minimum parsing on line.
+ If container name then bump instance value
+ If server trace then get conn_id and add line to connections data
+ :param line:
+ :return:
+ """
+ key_sstart = "SERVER (info) Container Name:" # Normal 'router is starting' restart discovery line
+ key_strace = "SERVER (trace) [" # AMQP traffic
+ key_error = "@error(29)"
+ key_openin = "<- @open(16)"
+ key_xfer = "@transfer"
+ key_prod_dispatch = ':product="qpid-dispatch-router"'
+ key_prod_aartemis = ':product="apache-activemq-artemis"'
+ key_prod_aqpidcpp = ':product="qpid-cpp"'
+ key_prod_aqpidjms = ':product="QpidJMS"'
+
+ if self.parse_identify(key_sstart, line):
+ self.instance += 1
+ self.restarts.append(line)
+ else:
+ if self.parse_identify(key_strace, line):
+ self.amqp_lines += 1
+ idx = line.find(key_strace)
+ idx += len(key_strace)
+ eidx = line.find("]", idx + 1)
+ conn_id = line[idx:eidx]
+ keyname = connection.keyname(self.instance, conn_id)
+ if keyname not in self.connections:
+ self.connections[keyname] = connection(self.instance, conn_id, self)
+ curr_conn = self.connections[keyname]
+ curr_conn.lines.append(line)
+ # router hint
+ if key_openin in line:
+ # inbound open
+ if key_prod_dispatch in line:
+ self.router_connections.append(curr_conn)
+ curr_conn.peer_open = line
+ curr_conn.peer_type = key_prod_dispatch
+ elif key_prod_aqpidjms in line:
+ curr_conn.peer_type = key_prod_aqpidjms
+ else:
+ for k in [key_prod_aartemis, key_prod_aqpidcpp]:
+ if k in line:
+ self.broker_connections.append(curr_conn)
+ curr_conn.peer_open = line
+ curr_conn.peer_type = k
+ elif self.parse_identify(key_xfer, line):
+ self.transfers += 1
+ curr_conn.transfers += 1
+ if key_error in line:
+ self.errors.append(line)
+
+ def log_of(self, x):
+ """
+ calculate nearest power of 10 > x
+ :param x:
+ :return:
+ """
+ for i in range(self.hist_max):
+ if x < 10 ** i:
+ return i
+ return self.hist_max
+
+ def sort_sizes(self, sortfunc1, sortfunc2):
+ smap = defaultdict(list)
+ conns_by_size = []
+ # create size map. index is size, list holds all connections of that many transfers
+ for k, v in dict_iteritems(self.connections):
+ smap[str(sortfunc1(v))].append(v)
+ # create a sorted list of sizes in sizemap
+ sl = list(dict_iterkeys(smap))
+ sli = [int(k) for k in sl]
+ slist = sorted(sli, reverse=True)
+ # create grand list of all connections
+ for cursize in slist:
+ lsm = smap[str(cursize)]
+ lsm = sorted(lsm, key = sortfunc2, reverse=True)
+ #lsm = sorted(lsm, key = lambda x: int(x.conn_id))
+ for ls in lsm:
+ conns_by_size.append(ls)
+ return conns_by_size
+
+
+ def summarize_connections(self):
+ # sort connections based on transfer count and on n log lines
+ self.conns_by_size_transfer = self.sort_sizes(lambda x: x.transfers, lambda x: len(x.lines))
+ self.conns_by_size_loglines = self.sort_sizes(lambda x: len(x.lines), lambda x: x.transfers)
+
+ # compute log_n and file name facts for all connections
+ for k, v in dict_iteritems(self.connections):
+ v.log_n_lines = self.log_of(len(v.lines))
+ v.generate_paths()
+
+ # Write the web doc to stdout
+ print ("""<!DOCTYPE html>
+ <html>
+ <head>
+ <title>%s qpid-dispatch log split</title>
+
+ <style>
+ * {
+ font-family: sans-serif;
+ }
+ table {
+ border-collapse: collapse;
+ }
+ table, td, th {
+ border: 1px solid black;
+ padding: 3px;
+ }
+ </style>
+""" % self.log_fn)
+
+ print("""
+<h3>Contents</h3>
+<table>
+<tr> <th>Section</th> <th>Description</th> </tr>
+<tr><td><a href=\"#c_summary\" >Summary</a></td> <td>Summary</td></tr>
+<tr><td><a href=\"#c_restarts\" >Router restarts</a></td> <td>Router reboot records</td></tr>
+<tr><td><a href=\"#c_router_conn\" >Interrouter connections</a></td> <td>Probable interrouter connections</td></tr>
+<tr><td><a href=\"#c_broker_conn\" >Broker connections</a></td> <td>Probable broker connections</td></tr>
+<tr><td><a href=\"#c_errors\" >AMQP errors</a></td> <td>AMQP errors</td></tr>
+<tr><td><a href=\"#c_conn_xfersize\" >Conn by N transfers</a></td> <td>Connections sorted by transfer log count</td></tr>
+<tr><td><a href=\"#c_conn_xfer0\" >Conn with no transfers</a></td> <td>Connections with no transfers</td></tr>
+<tr><td><a href=\"#c_conn_logsize\" >Conn by N log lines</a></td> <td>Connections sorted by total log line count</td></tr>
+</table>
+<hr>
+""")
+ print("<a name=\"c_summary\"></a>")
+ print("<table>")
+ print("<tr><th>Statistic</th> <th>Value</th></tr>")
+ print("<tr><td>File</td> <td>%s</td></tr>" % self.log_fn)
+ print("<tr><td>Router starts</td> <td>%s</td></tr>" % str(self.instance))
+ print("<tr><td>Connections</td> <td>%s</td></tr>" % str(len(self.connections)))
+ print("<tr><td>Router connections</td> <td>%s</td></tr>" % str(len(self.router_connections)))
+ print("<tr><td>AMQP log lines</td> <td>%s</td></tr>" % str(self.amqp_lines))
+ print("<tr><td>AMQP errors</td> <td>%s</td></tr>" % str(len(self.errors)))
+ print("<tr><td>AMQP transfers</td> <td>%s</td></tr>" % str(self.transfers))
+ print("</table>")
+ print("<hr>")
+
+ # Restarts
+ print("<a name=\"c_restarts\"></a>")
+ print("<h3>Restarts</h3>")
+ for i in range(1, (self.instance + 1)):
+ rr = self.restarts[i-1]
+ print("(%d) - %s<br>" % (i, rr), end='')
+ print("<hr>")
+
+ # interrouter connections
+ print("<a name=\"c_router_conn\"></a>")
+ print("<h3>Probable inter-router connections (N=%d)</h3>" % (len(self.router_connections)))
+ print("<table>")
+ print("<tr><th>Connection</th> <th>Transfers</th> <th>Log lines</th> <th>AMQP Open<th></tr>")
+ for rc in self.router_connections:
+ print("<tr><td><a href=\"%s/%s\">%s</a></td><td>%d</td><td>%d</td><td>%s</td></tr>" %
+ (rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+ cgi.escape(rc.peer_open)))
+ print("</table>")
+ print("<hr>")
+
+ # broker connections
+ print("<a name=\"c_broker_conn\"></a>")
+ print("<h3>Probable broker connections (N=%d)</h3>" % (len(self.broker_connections)))
+ print("<table>")
+ print("<tr><th>Connection</th> <th>Transfers</th> <th>Log lines</th> <th>AMQP Open<th></tr>")
+ for rc in self.broker_connections:
+ print("<tr><td><a href=\"%s/%s\">%s</a></td><td>%d</td><td>%d</td><td>%s</td></tr>" %
+ (rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+ cgi.escape(rc.peer_open)))
+ print("</table>")
+ print("<hr>")
+
+ ## histogram
+ #for cursize in self.sizelist:
+ # self.histogram[self.log_of(cursize)] += len(self.sizemap[str(cursize)])
+ #print()
+ #print("Log lines per connection distribution")
+ #for i in range(1, self.hist_max):
+ # print("N < 10e%d : %d" %(i, self.histogram[i]))
+ #print("N >= 10e%d : %d" % ((self.hist_max - 1), self.histogram[self.hist_max]))
+
+ # errors
+ print("<a name=\"c_errors\"></a>")
+ print("<h3>AMQP errors (N=%d)</h3>" % (len(self.errors)))
+ print("<table>")
+ print("<tr><th>N</th> <th>AMQP error</th></tr>")
+ for i in range(len(self.errors)):
+ print("<tr><td>%d</td> <td>%s</td></tr>" % (i, cgi.escape(self.errors[i].strip())))
+ print("</table>")
+ print("<hr>")
+
+ def odir(self):
+ return os.path.join(os.getcwd(), (self.log_fn + ".splits"))
+
+ def write_subfiles(self):
+ # Q: Where to put the generated files? A: odir
+ odir = self.odir()
+ odirs = ['dummy'] # dirs indexed by log of n-lines
+
+ os.makedirs(odir)
+ for i in range(1, self.hist_max):
+ nrange = ("10e%d" % (i))
+ ndir = os.path.join(odir, nrange)
+ os.makedirs(ndir)
+ odirs.append(ndir)
+
+ for k, c in dict_iteritems(self.connections):
+ cdir = odirs[self.log_of(len(c.lines))]
+ opath = os.path.join(cdir, (c.disp_name() + ".log"))
+ with open(opath, 'w') as f:
+ for l in c.lines:
+ f.write(l)
+
+ xfer0 = 0
+ for rc in self.conns_by_size_transfer:
+ if rc.transfers == 0:
+ xfer0 += 1
+ print("<a name=\"c_conn_xfersize\"></a>")
+ print("<h3>Connections by transfer count (N=%d)</h3>" % (len(self.conns_by_size_transfer) - xfer0))
+ print("<table>")
+ n = 1
+ print("<tr><th>N</th><th>Connection</th> <th>Transfers</th> <th>Log lines</th> <th>Type</th> <th>AMQP detail<th></tr>")
+ for rc in self.conns_by_size_transfer:
+ if rc.transfers > 0:
+ print("<tr><td>%d</td><td><a href=\"%s/%s\">%s</a></td> <td>%d</td> <td>%d</td> <td>%s</td> <td>%s</td></tr>" %
+ (n, rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+ rc.peer_type, cgi.escape(rc.peer_open)))
+ n += 1
+ print("</table>")
+ print("<hr>")
+
+ print("<a name=\"c_conn_xfer0\"></a>")
+ print("<h3>Connections with no AMQP transfers (N=%d)</h3>" % (xfer0))
+ print("<table>")
+ n = 1
+ print("<tr><th>N</th><th>Connection</th> <th>Transfers</th> <th>Log lines</th> <th>Type</th> <th>AMQP detail<th></tr>")
+ for rc in self.conns_by_size_transfer:
+ if rc.transfers == 0:
+ print("<tr><td>%d</td><td><a href=\"%s/%s\">%s</a></td> <td>%d</td> <td>%d</td> <td>%s</td> <td>%s</td></tr>" %
+ (n, rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+ rc.peer_type, cgi.escape(rc.peer_open)))
+ n += 1
+ print("</table>")
+ print("<hr>")
+
+ print("<a name=\"c_conn_logsize\"></a>")
+ print("<h3>Connections by total log line count (N=%d)</h3>" % (len(self.conns_by_size_loglines)))
+ print("<table>")
+ n = 1
+ print("<tr><th>N</th><th>Connection</th> <th>Transfers</th> <th>Log lines</th> <th>Type</th> <th>AMQP detail<th></tr>")
+ for rc in self.conns_by_size_loglines:
+ print("<tr><td>%d</td><td><a href=\"%s/%s\">%s</a></td> <td>%d</td> <td>%d</td> <td>%s</td> <td>%s</td></tr>" %
+ (n, rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+ rc.peer_type, cgi.escape(rc.peer_open)))
+ n += 1
+ print("</table>")
+ print("<hr>")
+
+
+# py 2-3 compat
+
+IS_PY2 = sys.version_info[0] == 2
+
+if IS_PY2:
+ def dict_iteritems(d):
+ return d.iteritems()
+ def dict_iterkeys(d):
+ return d.iterkeys()
+else:
+ def dict_iteritems(d):
+ return iter(d.items())
+ def dict_iterkeys(d):
+ return iter(d.keys())
+
+
+#
+#
+def main_except(log_fn):
+ """
+ Given a log file name, split the file into per-connection sub files
+ """
+ log_files = []
+
+ if not os.path.exists(log_fn):
+ sys.exit('ERROR: log file %s was not found!' % log_fn)
+
+ # parse the log file
+ with open(log_fn, 'r') as infile:
+ lf = LogFile(log_fn)
+ odir = lf.odir()
+ if os.path.exists(odir):
+ sys.exit('ERROR: output directory %s exists' % odir)
+ log_files.append(lf)
+ for line in infile:
+ lf.parse_line(line)
+
+ # write output
+ for lf in log_files:
+ lf.summarize_connections() # prints web page to console
+ lf.write_subfiles() # generates split files one-per-connection
+ pass
+
+def main(argv):
+ try:
+ if len(argv) != 2:
+ sys.exit('Usage: %s log-file-name' % argv[0])
+ main_except(argv[1])
+ return 0
+ except Exception as e:
+ traceback.print_exc()
+ return 1
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/main.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/main.py b/bin/log_scraper/main.py
index a0d4b40..a44039f 100755
--- a/bin/log_scraper/main.py
+++ b/bin/log_scraper/main.py
@@ -33,6 +33,7 @@ from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
+import argparse
import ast
import cgi
import os
@@ -40,6 +41,7 @@ import sys
import traceback
import common
+from log_splitter import main_except as splitter_main
import parser
import router
import text
@@ -77,23 +79,38 @@ def show_noteworthy_line(plf, comn):
#
#
def main_except(argv):
- """
- Given a list of log file names, send the javascript web page to stdout
- """
- if len(argv) < 2:
- sys.exit('Usage: %s [--no-data] log-file-name [log-file-name ...]' % argv[0])
-
# Instantiate a common block
comn = common.Common()
- # optparse - look for --no-data switch
- if argv[1] == "--no-data":
- comn.arg_index_data = False
- del argv[1]
+ # optparse - look for data-inhibit and program mode control
+ p = argparse.ArgumentParser()
+ p.add_argument('--skip-all-data',
+ action='store_true',
+ help='Max load shedding: do not store/index transfer, disposition, flow or EMPTY_FRAME data')
+ p.add_argument('--skip-detail',
+ action='store_true',
+ help='Load shedding: do not produce Connection Details tables')
+ p.add_argument('--skip-msg-progress',
+ action='store_true',
+ help='Load shedding: do not produce Message Progress tables')
+ p.add_argument('--split',
+ action='store_true',
+ help='A file is split into per-connection data. Normal processing is not performed.')
+ p.add_argument('--files', '-f', nargs="+")
+
+ del argv[0]
+ comn.args = p.parse_args(argv)
+
+ # process split function
+ if comn.args.split:
+ # Split processes only a single file
+ if len(comn.args.files) > 1:
+ sys.exit('--split mode takes only one file name')
+ return splitter_main(comn.args.files[0])
# process the log files and add the results to router_array
- for log_i in range(0, len(sys.argv) - 1):
- arg_log_file = sys.argv[log_i + 1]
+ for log_i in range(len(comn.args.files)):
+ arg_log_file = comn.args.files[log_i]
comn.log_fns.append(arg_log_file)
comn.n_logs += 1
@@ -255,8 +272,8 @@ def main_except(argv):
print(text.web_page_toc())
# Report how much data was skipped if --no-data switch in effect
- if not comn.arg_index_data:
- print("--no-data switch in effect. %d log lines skipped" % comn.data_skipped)
+ if comn.args.skip_all_data:
+ print("--skip-all-data switch is in effect. %d log lines skipped" % comn.data_skipped)
print("<p><hr>")
# file(s) included in this doc
@@ -398,9 +415,12 @@ def main_except(argv):
# connection details
print("<a name=\"c_conndetails\"></a>")
print("<h3>Connection Details</h3>")
- for rtrlist in comn.routers:
- for rtr in rtrlist:
- rtr.details.show_html()
+ if not comn.args.skip_detail:
+ for rtrlist in comn.routers:
+ for rtr in rtrlist:
+ rtr.details.show_html()
+ else:
+ print ("details suppressed<br>")
print("<hr>")
# noteworthy log lines: highlight errors and stuff
@@ -522,7 +542,8 @@ def main_except(argv):
# data traversing network
print("<a name=\"c_messageprogress\"></a>")
print("<h3>Message progress</h3>")
- for i in range(0, comn.shorteners.short_data_names.len()):
+ if not comn.args.skip_msg_progress:
+ for i in range(0, comn.shorteners.short_data_names.len()):
sname = comn.shorteners.short_data_names.shortname(i)
size = 0
for plf in tree:
http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/parser.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/parser.py b/bin/log_scraper/parser.py
index 44c3e91..05e8ae3 100755
--- a/bin/log_scraper/parser.py
+++ b/bin/log_scraper/parser.py
@@ -29,7 +29,6 @@ import re
import sys
import traceback
-import splitter
import test_data as td
import common
import text
@@ -41,6 +40,55 @@ def colorize_bg(what):
return what
+def proton_split(line):
+ """
+ Split a log line into fields.
+ * allow commas and spaces in quoted strings.
+ * split on ', ' and on ' '.
+ strip trailing commas between fields.
+ * quoted fields must have both quotes
+ :param line:
+ :return:
+ """
+ result = []
+ indqs = False
+ pending_comma = False
+ res = ""
+ for i in range(len(line)):
+ c = line[i]
+ if c == '\"':
+ if pending_comma:
+ res += ','
+ pending_comma = False
+ indqs = not indqs
+ res += c
+ elif c == ',':
+ if pending_comma:
+ res += c
+ pending_comma = True
+ elif c == ' ':
+ if indqs:
+ if pending_comma:
+ res += ','
+ pending_comma = False
+ res += c
+ else:
+ if res != '':
+ if pending_comma:
+ pending_comma = False
+ result.append(res)
+ res = ''
+ else:
+ res += c
+ if res != '':
+ result.append(str(res))
+ if indqs:
+ raise ValueError("SPLIT ODD QUOTES: %s", line)
+ # print ("SPLIT: line: %s" % line)
+ # print ("SPLIT: flds: %s" % result)
+ return result
+
+
class LogLineData:
def direction_is_in(self):
@@ -227,7 +275,7 @@ class DescribedType:
self.line = self.line[:-1]
# process fields
- fields = splitter.Splitter.split(self.line)
+ fields = proton_split(self.line)
while len(fields) > 0 and len(fields[0]) > 0:
if '=' not in fields[0]:
raise ValueError("Field does not contain equal sign '%s'" % fields[0])
@@ -243,23 +291,43 @@ class DescribedType:
subfields.append("[]")
del fields[0]
else:
+ # While extracting this type's fields, include nested described types
+ # and PN_SYMBOL data enclosed in brackets. Current type ends when close
+ # bracket seen and nest level is zero.
+ nest = 0
while len(fields) > 0:
- if fields[0].endswith('],'):
- subfields.append(fields[0][:-2])
- subfields.append(']')
- del fields[0]
- break
- if fields[0].endswith(']'):
- subfields.append(fields[0][:-1])
- subfields.append(']')
+ if "=@" in fields[0] and "]" not in fields[0] and "=@:" not in fields[0]:
+ nest += 1
+ if nest == 0:
+ if fields[0].endswith('],'):
+ subfields.append(fields[0][:-2])
+ subfields.append(']')
+ del fields[0]
+ break
+ if fields[0].endswith(']'):
+ subfields.append(fields[0][:-1])
+ subfields.append(']')
+ del fields[0]
+ break
+ elif fields[0].endswith('],') or fields[0].endswith(']'):
+ nest -= 1
+ if fields[0].endswith(']]'):
+ subfields.append(fields[0])
del fields[0]
break
subfields.append(fields[0])
del fields[0]
+
subtype = DescribedType()
subtype.parse_dtype_line(val, ' '.join(subfields))
self.dict[key] = subtype
+ elif val.startswith("@PN_SYMBOL"):
+ # symbols may end in first field or some later field
+ while not val.endswith(']'):
+ val += fields[0]
+ del fields[0]
+ self.dict[key] = val
elif val.startswith('{'):
# handle some embedded map: properties={:product=\"qpid-dispatch-router\", :version=\"1.3.0-SNAPSHOT\"}
# pull subtype's data out of fields. The fields list belongs to parent.
@@ -717,7 +785,20 @@ class ParsedLogLine(object):
try:
self.datetime = datetime.strptime(self.line[:26], '%Y-%m-%d %H:%M:%S.%f')
except:
- self.datetime = datetime(1970, 1, 1)
+ # old routers flub the timestamp and don't print leading zero in uS time
+ # 2018-11-18 11:31:08.269 should be 2018-11-18 11:31:08.000269
+ td = self.line[:26]
+ parts = td.split('.')
+ us = parts[1]
+ parts_us = us.split(' ')
+ if len(parts_us[0]) < 6:
+ parts_us[0] = '0' * (6 - len(parts_us[0])) + parts_us[0]
+ parts[1] = ' '.join(parts_us)
+ td = '.'.join(parts)
+ try:
+ self.datetime = datetime.strptime(td[:26], '%Y-%m-%d %H:%M:%S.%f')
+ except:
+ self.datetime = datetime(1970, 1, 1)
# extract connection number
sti = self.line.find(self.server_trace_key)
@@ -868,7 +949,7 @@ def parse_log_file(fn, log_index, comn):
try:
if lineno == 130:
pass
- do_this = comn.arg_index_data
+ do_this = True if not hasattr(comn.args, 'skip_all_data') else not comn.args.skip_all_data
if not do_this:
# not indexing data. maybe do this line anyway
do_this = not any(s in line for s in [' @transfer', ' @disposition', ' @flow', 'EMPTY FRAME'])
@@ -894,7 +975,19 @@ def parse_log_file(fn, log_index, comn):
if __name__ == "__main__":
+ print("Line-by-line split test")
+ try:
+ for line in td.TestData().data():
+ if "transfer" not in line:
+ print(proton_split(line))
+ else:
+ pass # splitter does not split transfers
+ pass
+ except:
+ traceback.print_exc(file=sys.stdout)
+ pass
+ print("Canned data parse test")
data = td.TestData().data()
log_index = 0 # from file for router A
instance = 0 # all from router instance 0
@@ -908,6 +1001,7 @@ if __name__ == "__main__":
traceback.print_exc(file=sys.stdout)
pass
+ print("Read two-instance file test")
comn2 = common.Common()
routers = parse_log_file('test_data/A-two-instances.log', 0, comn2)
if len(routers) != 2:
http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/splitter.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/splitter.py b/bin/log_scraper/splitter.py
deleted file mode 100755
index a0353f7..0000000
--- a/bin/log_scraper/splitter.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import print_function
-
-import traceback
-import test_data as td
-
-
-class Splitter():
- @staticmethod
- def split(line):
- """
- Split a log line into fields.
- * allow commas and spaces in quoted strings.
- * split on ', ' and on ' '.
- strip trailing commas between fields.
- * quoted fields must have both quotes
- :param line:
- :return:
- """
- result = []
- indqs = False
- pending_comma = False
- res = ""
- for i in range(len(line)):
- c = line[i]
- if c == '\"':
- if pending_comma:
- res += ','
- pending_comma = False
- indqs = not indqs
- res += c
- elif c == ',':
- if pending_comma:
- res += c
- pending_comma = True
- elif c == ' ':
- if indqs:
- if pending_comma:
- res += ','
- pending_comma = False
- res += c
- else:
- if res != '':
- if pending_comma:
- pending_comma = False
- result.append(res)
- res = ''
- else:
- res += c
- if res != '':
- result.append(str(res))
- if indqs:
- raise ValueError("SPLIT ODD QUOTES: %s", line)
- # print ("SPLIT: line: %s" % line)
- # print ("SPLIT: flds: %s" % result)
- return result
-
-
-if __name__ == "__main__":
-
- try:
- for line in td.TestData().data():
- if "transfer" not in line:
- print(Splitter.split(line))
- print()
- else:
- pass # splitter does not split transfers
- pass
- except:
- traceback.print_exc(file=sys.stdout)
- pass
http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/test_data/test_data.txt
----------------------------------------------------------------------
diff --git a/bin/log_scraper/test_data/test_data.txt b/bin/log_scraper/test_data/test_data.txt
index 6c91258..f659a34 100644
--- a/bin/log_scraper/test_data/test_data.txt
+++ b/bin/log_scraper/test_data/test_data.txt
@@ -1,3 +1,5 @@
+2018-11-21 15:47:09.727570 -0500 SERVER (trace) [7]:1 <- @attach(18) [name="qpid-jms:sender:ID:23d7f58d-9bb1-4a8a-9701-d6eb7f7ec15e:1:1:1:test.Q0", handle=0, role=false, snd-settle-mode=0, rcv-settle-mode=0, source=@source(40) [address="ID:23d7f58d-9cc1-4a8a-9701-d6eb7f7ec15e:1:1:1", durable=0, expiry-policy=:"session-end", timeout=0, dynamic=false, outcomes=@PN_SYMBOL[:"amqp:accepted:list", :"amqp:rejected:list", :"amqp:released:list", :"amqp:modified:list"]], target=@target(41) [address="test.Q0", durable=0, expiry-policy=:"session-end", timeout=0, dynamic=false, capabilities=@PN_SYMBOL[:queue]], incomplete-unsettled=false, initial-delivery-count=0, desired-capabilities=@PN_SYMBOL[:"DELAYED_DELIVERY"]]
+2018-11-18 10:52:52.34008 -0500 SERVER (trace) [255]:2 <- @attach(18) [name="qpid-jms:receiver:ID:c50ab67b-0ff1-41fe-84a6-7a7bace101ec:16263:2:1:some-queue", handle=0, role=true, snd-settle-mode=0, rcv-settle-mode=0, source=@source(40) [address="some-queue", durable=0, expiry-policy=:"link-detach", timeout=0, dynamic=false, default-outcome=@modified(39) [delivery-failed=true], outcomes=@PN_SYMBOL[:"amqp:accepted:list", :"amqp:rejected:list", :"amqp:released:list", :"amqp:modified:list"], capabilities=@PN_SYMBOL[:queue]], target=@target(41) []]
2018-07-20 10:58:40.176528 -0400 SERVER (trace) [2] Connecting to 127.0.0.1:23731 (/home/chug/git/qpid-dispatch/src/server.c:1052)
2018-07-20 10:58:40.176628 -0400 SERVER (trace) [2]: -> SASL (/home/chug/git/qpid-dispatch/src/server.c:106)
2018-07-20 10:58:40.176841 -0400 SERVER (trace) [2]: <- SASL (/home/chug/git/qpid-dispatch/src/server.c:106)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@qpid.apache.org
For additional commands, e-mail: commits-help@qpid.apache.org