You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by ke...@apache.org on 2020/12/09 17:00:20 UTC

[skywalking-python] branch master updated: [Enhancement] optimized path trace ignore (#96)

This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking-python.git


The following commit(s) were added to refs/heads/master by this push:
     new eca37b8  [Enhancement] optimized path trace ignore (#96)
eca37b8 is described below

commit eca37b82e5f3f5aada74053bbe1cbfb618cfd0d6
Author: Tomasz Pytel <to...@gmail.com>
AuthorDate: Wed Dec 9 14:00:09 2020 -0300

    [Enhancement] optimized path trace ignore (#96)
---
 skywalking/agent/__init__.py    |  1 +
 skywalking/config.py            | 28 ++++++++++++++---
 skywalking/decorators.py        | 20 ++----------
 skywalking/trace/context.py     | 11 +------
 skywalking/utils/ant_matcher.py | 39 -----------------------
 tests/test_ant_matcher.py       | 69 +++++++++++++++++++++++------------------
 6 files changed, 67 insertions(+), 101 deletions(-)

diff --git a/skywalking/agent/__init__.py b/skywalking/agent/__init__.py
index b6a3046..3a6b001 100644
--- a/skywalking/agent/__init__.py
+++ b/skywalking/agent/__init__.py
@@ -72,6 +72,7 @@ def start():
         raise RuntimeError('the agent can only be started once')
     from skywalking import loggings
     loggings.init()
+    config.finalize()
     __started = True
     __init()
     __heartbeat_thread.start()
diff --git a/skywalking/config.py b/skywalking/config.py
index 204d277..e4c4cbe 100644
--- a/skywalking/config.py
+++ b/skywalking/config.py
@@ -16,12 +16,15 @@
 #
 import inspect
 import os
+import re
 import uuid
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from typing import List
 
+RE_IGNORE_PATH = re.compile('^$')  # type: 're.Pattern'
+
 service_name = os.getenv('SW_AGENT_NAME') or 'Python Service Name'  # type: str
 service_instance = os.getenv('SW_AGENT_INSTANCE') or str(uuid.uuid1()).replace('-', '')  # type: str
 collector_address = os.getenv('SW_AGENT_COLLECTOR_BACKEND_SERVICES') or '127.0.0.1:11800'  # type: str
@@ -46,9 +49,7 @@ django_collect_http_params = True if os.getenv('SW_DJANGO_COLLECT_HTTP_PARAMS')
                                      os.getenv('SW_DJANGO_COLLECT_HTTP_PARAMS') == 'True' else False  # type: bool
 correlation_element_max_number = int(os.getenv('SW_CORRELATION_ELEMENT_MAX_NUMBER') or '3')  # type: int
 correlation_value_max_length = int(os.getenv('SW_CORRELATION_VALUE_MAX_LENGTH') or '128')  # type: int
-trace_ignore = True if os.getenv('SW_TRACE_IGNORE') and \
-                       os.getenv('SW_TRACE_IGNORE') == 'True' else False  # type: bool
-trace_ignore_path = [s.strip() for s in (os.getenv('SW_TRACE_IGNORE_PATH') or '').split(',')]  # type: List[str]
+trace_ignore_path = os.getenv('SW_TRACE_IGNORE_PATH') or ''  # type: str
 elasticsearch_trace_dsl = True if os.getenv('SW_ELASTICSEARCH_TRACE_DSL') and \
                                   os.getenv('SW_ELASTICSEARCH_TRACE_DSL') == 'True' else False  # type: bool
 kafka_bootstrap_servers = os.getenv('SW_KAFKA_REPORTER_BOOTSTRAP_SERVERS') or "localhost:9092"  # type: str
@@ -79,11 +80,29 @@ def init(
     authentication = token or authentication
 
 
+def finalize():
+    reesc = re.compile(r'([.*+?^=!:${}()|\[\]\\])')
+    suffix = r'^.+(?:' + '|'.join(reesc.sub(r'\\\1', s.strip()) for s in ignore_suffix.split(',')) + ')$'
+    path = '^(?:' + \
+           '|'.join(                          # replaces ","
+               '(?:(?:[^/]+/)*[^/]+)?'.join(  # replaces "**"
+                   '[^/]*'.join(              # replaces "*"
+                       '[^/]'.join(           # replaces "?"
+                           reesc.sub(r'\\\1', s) for s in p2.split('?')
+                       ) for p2 in p1.split('*')
+                   ) for p1 in p0.strip().split('**')
+               ) for p0 in trace_ignore_path.split(',')
+           ) + ')$'
+
+    global RE_IGNORE_PATH
+    RE_IGNORE_PATH = re.compile('%s|%s' % (suffix, path))
+
+
 def serialize():
     from skywalking import config
     return {
         key: value for key, value in config.__dict__.items() if not (
-                key.startswith('_') or key == 'TYPE_CHECKING'
+                key.startswith('_') or key == 'TYPE_CHECKING' or key == 'RE_IGNORE_PATH'
                 or inspect.isfunction(value)
                 or inspect.ismodule(value)
                 or inspect.isbuiltin(value)
@@ -97,3 +116,4 @@ def deserialize(data):
     for key, value in data.items():
         if key in config.__dict__:
             config.__dict__[key] = value
+    finalize()
diff --git a/skywalking/decorators.py b/skywalking/decorators.py
index 444c7a5..81c4893 100644
--- a/skywalking/decorators.py
+++ b/skywalking/decorators.py
@@ -40,12 +40,7 @@ def trace(
                     span.layer = layer
                     span.component = component
                     [span.tag(tag) for tag in tags or []]
-                    try:
-                        result = func(*args, **kwargs)
-                        return await result
-                    except Exception:
-                        span.raised()
-                        raise
+                    return await func(*args, **kwargs)
             return wrapper
 
         else:
@@ -55,12 +50,7 @@ def trace(
                     span.layer = layer
                     span.component = component
                     [span.tag(tag) for tag in tags or []]
-                    try:
-                        result = func(*args, **kwargs)
-                        return result
-                    except Exception:
-                        span.raised()
-                        raise
+                    return func(*args, **kwargs)
             return wrapper
 
     return decorator
@@ -84,11 +74,7 @@ def runnable(
                 span.layer = layer
                 span.component = component
                 [span.tag(tag) for tag in tags or []]
-                try:
-                    func(*args, **kwargs)
-                except Exception:
-                    span.raised()
-                    raise
+                func(*args, **kwargs)
 
         return wrapper
 
diff --git a/skywalking/trace/context.py b/skywalking/trace/context.py
index af7d9fa..51dffa3 100644
--- a/skywalking/trace/context.py
+++ b/skywalking/trace/context.py
@@ -21,7 +21,6 @@ from skywalking.trace.carrier import Carrier
 from skywalking.trace.segment import Segment, SegmentRef
 from skywalking.trace.snapshot import Snapshot
 from skywalking.trace.span import Span, Kind, NoopSpan, EntrySpan, ExitSpan
-from skywalking.utils.ant_matcher import fast_path_match
 from skywalking.utils.counter import Counter
 
 
@@ -131,19 +130,11 @@ class SpanContext(object):
         return span
 
     def ignore_check(self, op: str, kind: Kind):
-        suffix_idx = op.rfind(".")
-        if suffix_idx > -1 and config.ignore_suffix.find(op[suffix_idx:]) > -1:
+        if config.RE_IGNORE_PATH.match(op):
             return NoopSpan(
                 context=NoopContext(),
                 kind=kind,
             )
-        if config.trace_ignore:
-            for pattern in config.trace_ignore_path:
-                if fast_path_match(pattern, op):
-                    return NoopSpan(
-                        context=NoopContext(),
-                        kind=kind,
-                    )
 
         return None
 
diff --git a/skywalking/utils/ant_matcher.py b/skywalking/utils/ant_matcher.py
deleted file mode 100644
index 382a1a5..0000000
--- a/skywalking/utils/ant_matcher.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-import re
-
-reesc = re.compile(r'([.*+?^=!:${}()|\[\]\\])')
-recache = {}
-
-
-def fast_path_match(pattern: str, path: str):
-    repat = recache.get(pattern)
-
-    if repat is None:
-        repat = recache[pattern] = \
-            re.compile('^(?:' +                       # this could handle multiple patterns in one by joining with '|'
-                       '(?:(?:[^/]+/)*[^/]+)?'.join(  # replaces "**"
-                           '[^/]*'.join(              # replaces "*"
-                               '[^/]'.join(           # replaces "?"
-                                   reesc.sub(r'\\\1', s) for s in p2.split('?')
-                               ) for p2 in p1.split('*')
-                           ) for p1 in pattern.split('**')
-                       ) + ')$')
-
-    return bool(repat.match(path))
diff --git a/tests/test_ant_matcher.py b/tests/test_ant_matcher.py
index de64c4d..a5e1c1b 100644
--- a/tests/test_ant_matcher.py
+++ b/tests/test_ant_matcher.py
@@ -16,70 +16,77 @@
 #
 import unittest
 
-from skywalking.utils.ant_matcher import fast_path_match
+from skywalking import config
+
+
+def fast_path_match(pattern, path):
+    config.trace_ignore_path = pattern
+    config.finalize()
+
+    return config.RE_IGNORE_PATH.match(path)
 
 
 class TestFastPathMatch(unittest.TestCase):
     def test_match(self):
-        patten = "/eureka/*"
+        pattern = "/eureka/*"
         path = "/eureka/apps"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "/eureka/"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "/eureka/apps/"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
 
-        patten = "/eureka/*/"
+        pattern = "/eureka/*/"
         path = "/eureka/apps/"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "/eureka/"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
         path = "/eureka/apps/list"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
 
-        patten = "/eureka/**"
+        pattern = "/eureka/**"
         path = "/eureka/"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "/eureka/apps/test"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "/eureka/apps/test/"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
 
-        patten = "eureka/apps/?"
+        pattern = "eureka/apps/?"
         path = "eureka/apps/list"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
         path = "eureka/apps/"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
         path = "eureka/apps/a"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
 
-        patten = "eureka/**/lists"
+        pattern = "eureka/**/lists"
         path = "eureka/apps/lists"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "eureka/apps/test/lists"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "eureka/apps/test/"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
         path = "eureka/apps/test"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
 
-        patten = "eureka/**/test/**"
+        pattern = "eureka/**/test/**"
         path = "eureka/apps/test/list"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "eureka/apps/foo/test/list/bar"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "eureka/apps/foo/test/list/bar/"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
         path = "eureka/apps/test/list"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "eureka/test/list"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
 
-        patten = "/eureka/**/b/**/*.txt"
+        pattern = "/eureka/**/b/**/*.txt"
         path = "/eureka/a/aa/aaa/b/bb/bbb/xxxxxx.txt"
-        self.assertTrue(fast_path_match(patten, path))
+        self.assertTrue(fast_path_match(pattern, path))
         path = "/eureka/a/aa/aaa/b/bb/bbb/xxxxxx"
-        self.assertFalse(fast_path_match(patten, path))
+        self.assertFalse(fast_path_match(pattern, path))
 
 
 if __name__ == '__main__':