You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by wu...@apache.org on 2021/03/03 09:19:07 UTC

[skywalking] branch master updated: Enhance the LAL to allow easily skipping logs with malformed formats (#6477)

This is an automated email from the ASF dual-hosted git repository.

wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git


The following commit(s) were added to refs/heads/master by this push:
     new cb16e52  Enhance the LAL to allow easily skipping logs with malformed formats (#6477)
cb16e52 is described below

commit cb16e524da2bf45258e67052443a6b9bc2460855
Author: Zhenxu Ke <ke...@apache.org>
AuthorDate: Wed Mar 3 17:18:50 2021 +0800

    Enhance the LAL to allow easily skipping logs with malformed formats (#6477)
---
 docs/en/concepts-and-designs/lal.md                | 42 +++++++++++++++-------
 .../log/analyzer/dsl/spec/filter/FilterSpec.java   | 32 +++++++++++------
 ...JsonParserSpec.java => AbstractParserSpec.java} | 27 +++++++-------
 .../analyzer/dsl/spec/parser/JsonParserSpec.java   | 11 ++++--
 .../analyzer/dsl/spec/parser/TextParserSpec.java   |  5 +--
 .../analyzer/dsl/spec/parser/YamlParserSpec.java   |  3 +-
 test/e2e/e2e-test/docker/log/lal.yaml              |  1 +
 7 files changed, 79 insertions(+), 42 deletions(-)

diff --git a/docs/en/concepts-and-designs/lal.md b/docs/en/concepts-and-designs/lal.md
index 90cad9a..c3f1af4 100644
--- a/docs/en/concepts-and-designs/lal.md
+++ b/docs/en/concepts-and-designs/lal.md
@@ -34,13 +34,7 @@ filter {
     if (log.service == "TestingService") { // Don't waste resources on TestingServices
         abort {} // all remaining components won't be executed at all
     }
-    text {
-        if (!regexp("(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)")) {
-            // if the logs don't match this regexp, skip it
-            abort {}
-        }
-    }
-    // ... extractors, sinks
+    // ... parsers, extractors, sinks
 }
 ```
 
@@ -55,15 +49,35 @@ types of parsers at the moment, namely `json`, `yaml`, and `text`.
 When a piece of log is parsed, there is a corresponding property available, called `parsed`, injected by LAL.
 Property `parsed` is typically a map, containing all the fields parsed from the raw logs, for example, if the parser
 is `json` / `yaml`, `parsed` is a map containing all the key-values in the `json` / `yaml`, if the parser is `text`
-, `parsed` is a map containing all the captured groups and their values (for `regexp` and `grok`). See examples below.
+, `parsed` is a map containing all the captured groups and their values (for `regexp` and `grok`).
+
+All parsers share the following options:
+
+| Option | Type | Description | Default Value |
+| ------ | ---- | ----------- | ------------- |
+| `abortOnFailure` | `boolean` | Whether the filter chain should abort if the parser failed to parse / match the logs | `true` |
+
+See examples below.
 
 #### `json`
 
-<!-- TODO: is structured in the reported (gRPC) `LogData`, not much to do -->
+```groovy
+filter {
+    json {
+        abortOnFailure true // this is optional because it's default behaviour
+    }
+}
+```
 
 #### `yaml`
 
-<!-- TODO: is structured in the reported (gRPC) `LogData`, not much to do -->
+```groovy
+filter {
+    yaml {
+        abortOnFailure true // this is optional because it's default behaviour
+    }
+}
+```
 
 #### `text`
 
@@ -78,8 +92,9 @@ all the captured groups can be used later in the extractors or sinks.
 ```groovy
 filter {
     text {
-        regexp "(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)"
+        abortOnFailure true // this is optional because it's default behaviour
         // this is just a demo pattern
+        regexp "(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)"
     }
     extractor {
         tag level: parsed.level
@@ -91,9 +106,10 @@ filter {
 }
 ```
 
-- `grok`
+- `grok` (TODO)
 
-<!-- TODO: grok Java library has poor performance, need to benchmark it, the idea is basically the same with `regexp` above -->
+Because grok Java library has performance issue, we need some investigations and benchmark on it. Contributions are
+welcome.
 
 ### Extractor
 
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java
index 9cfe593..7683bb6 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java
@@ -98,11 +98,17 @@ public class FilterSpec extends AbstractSpec {
         cl.call();
 
         final LogData.Builder logData = BINDING.get().log();
-        final Map<String, Object> parsed = jsonParser.create().fromJson(
-            logData.getBody().getJson().getJson(), parsedType
-        );
-
-        BINDING.get().parsed(parsed);
+        try {
+            final Map<String, Object> parsed = jsonParser.create().fromJson(
+                logData.getBody().getJson().getJson(), parsedType
+            );
+
+            BINDING.get().parsed(parsed);
+        } catch (final Exception e) {
+            if (jsonParser.abortOnFailure()) {
+                BINDING.get().abort();
+            }
+        }
     }
 
     @SuppressWarnings({"unused", "unchecked"})
@@ -114,11 +120,17 @@ public class FilterSpec extends AbstractSpec {
         cl.call();
 
         final LogData.Builder logData = BINDING.get().log();
-        final Map<String, Object> parsed = (Map<String, Object>) yamlParser.create().load(
-            logData.getBody().getYaml().getYaml()
-        );
-
-        BINDING.get().parsed(parsed);
+        try {
+            final Map<String, Object> parsed = (Map<String, Object>) yamlParser.create().load(
+                logData.getBody().getYaml().getYaml()
+            );
+
+            BINDING.get().parsed(parsed);
+        } catch (final Exception e) {
+            if (yamlParser.abortOnFailure()) {
+                BINDING.get().abort();
+            }
+        }
     }
 
     @SuppressWarnings("unused")
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/AbstractParserSpec.java
similarity index 66%
copy from oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
copy to oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/AbstractParserSpec.java
index 6bb716d..e410cbe 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/AbstractParserSpec.java
@@ -18,23 +18,26 @@
 
 package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
 
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.experimental.Accessors;
 import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
 import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
 import org.apache.skywalking.oap.server.library.module.ModuleManager;
 
-public class JsonParserSpec extends AbstractSpec {
-    private final GsonBuilder gsonBuilder;
+@Accessors(fluent = true)
+public class AbstractParserSpec extends AbstractSpec {
+    /**
+     * Whether the filter chain should abort when parsing the logs failed.
+     *
+     * Failing to parse the logs means either parsing throws exceptions or the logs not matching the desired patterns.
+     */
+    @Getter
+    @Setter
+    private boolean abortOnFailure = true;
 
-    public JsonParserSpec(final ModuleManager moduleManager,
-                          final LogAnalyzerModuleConfig moduleConfig) {
+    public AbstractParserSpec(final ModuleManager moduleManager,
+                              final LogAnalyzerModuleConfig moduleConfig) {
         super(moduleManager, moduleConfig);
-
-        gsonBuilder = new GsonBuilder();
-    }
-
-    public Gson create() {
-        return gsonBuilder.create();
     }
 }
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
index 6bb716d..850daab 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
@@ -20,21 +20,26 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
 
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
-import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
 import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
 import org.apache.skywalking.oap.server.library.module.ModuleManager;
 
-public class JsonParserSpec extends AbstractSpec {
+public class JsonParserSpec extends AbstractParserSpec {
     private final GsonBuilder gsonBuilder;
 
+    private final Gson gson;
+
     public JsonParserSpec(final ModuleManager moduleManager,
                           final LogAnalyzerModuleConfig moduleConfig) {
         super(moduleManager, moduleConfig);
 
         gsonBuilder = new GsonBuilder();
+
+        // We just create a gson instance in advance for now (for the sake of performance),
+        // when we want to provide some extra options, we'll move this into method "create" then.
+        gson = gsonBuilder.create();
     }
 
     public Gson create() {
-        return gsonBuilder.create();
+        return gson;
     }
 }
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java
index 0ffccab..c77f639 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java
@@ -21,11 +21,10 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.skywalking.apm.network.logging.v3.LogData;
-import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
 import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
 import org.apache.skywalking.oap.server.library.module.ModuleManager;
 
-public class TextParserSpec extends AbstractSpec {
+public class TextParserSpec extends AbstractParserSpec {
     public TextParserSpec(final ModuleManager moduleManager,
                           final LogAnalyzerModuleConfig moduleConfig) {
         super(moduleManager, moduleConfig);
@@ -45,6 +44,8 @@ public class TextParserSpec extends AbstractSpec {
         final boolean matched = matcher.find();
         if (matched) {
             BINDING.get().parsed(matcher);
+        } else if (abortOnFailure()) {
+            BINDING.get().abort();
         }
         return matched;
     }
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java
index 2bb22b9..bbd4dc4 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java
@@ -18,7 +18,6 @@
 
 package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
 
-import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
 import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
 import org.apache.skywalking.oap.server.library.module.ModuleManager;
 import org.yaml.snakeyaml.DumperOptions;
@@ -27,7 +26,7 @@ import org.yaml.snakeyaml.Yaml;
 import org.yaml.snakeyaml.constructor.SafeConstructor;
 import org.yaml.snakeyaml.representer.Representer;
 
-public class YamlParserSpec extends AbstractSpec {
+public class YamlParserSpec extends AbstractParserSpec {
     private final LoaderOptions loaderOptions;
 
     public YamlParserSpec(final ModuleManager moduleManager,
diff --git a/test/e2e/e2e-test/docker/log/lal.yaml b/test/e2e/e2e-test/docker/log/lal.yaml
index d0d6a19..2b94988 100644
--- a/test/e2e/e2e-test/docker/log/lal.yaml
+++ b/test/e2e/e2e-test/docker/log/lal.yaml
@@ -18,6 +18,7 @@ rules:
     dsl: |
       filter {
         text {
+          abortOnFailure false // for test purpose, we want to persist all logs
           regexp $/(?s)(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3}) \[TID:(?<tid>.+?)] \[(?<thread>.+?)] (?<level>\w{4,}) (?<logger>.{1,36}) (?<msg>.+)/$
         }
         extractor {