You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by wu...@apache.org on 2021/03/03 09:19:07 UTC
[skywalking] branch master updated: Enhance the LAL to allow easily
skipping logs with malformed formats (#6477)
This is an automated email from the ASF dual-hosted git repository.
wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git
The following commit(s) were added to refs/heads/master by this push:
new cb16e52 Enhance the LAL to allow easily skipping logs with malformed formats (#6477)
cb16e52 is described below
commit cb16e524da2bf45258e67052443a6b9bc2460855
Author: Zhenxu Ke <ke...@apache.org>
AuthorDate: Wed Mar 3 17:18:50 2021 +0800
Enhance the LAL to allow easily skipping logs with malformed formats (#6477)
---
docs/en/concepts-and-designs/lal.md | 42 +++++++++++++++-------
.../log/analyzer/dsl/spec/filter/FilterSpec.java | 32 +++++++++++------
...JsonParserSpec.java => AbstractParserSpec.java} | 27 +++++++-------
.../analyzer/dsl/spec/parser/JsonParserSpec.java | 11 ++++--
.../analyzer/dsl/spec/parser/TextParserSpec.java | 5 +--
.../analyzer/dsl/spec/parser/YamlParserSpec.java | 3 +-
test/e2e/e2e-test/docker/log/lal.yaml | 1 +
7 files changed, 79 insertions(+), 42 deletions(-)
diff --git a/docs/en/concepts-and-designs/lal.md b/docs/en/concepts-and-designs/lal.md
index 90cad9a..c3f1af4 100644
--- a/docs/en/concepts-and-designs/lal.md
+++ b/docs/en/concepts-and-designs/lal.md
@@ -34,13 +34,7 @@ filter {
if (log.service == "TestingService") { // Don't waste resources on TestingServices
abort {} // all remaining components won't be executed at all
}
- text {
- if (!regexp("(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)")) {
- // if the logs don't match this regexp, skip it
- abort {}
- }
- }
- // ... extractors, sinks
+ // ... parsers, extractors, sinks
}
```
@@ -55,15 +49,35 @@ types of parsers at the moment, namely `json`, `yaml`, and `text`.
When a piece of log is parsed, there is a corresponding property available, called `parsed`, injected by LAL.
Property `parsed` is typically a map, containing all the fields parsed from the raw logs, for example, if the parser
is `json` / `yaml`, `parsed` is a map containing all the key-values in the `json` / `yaml`, if the parser is `text`
-, `parsed` is a map containing all the captured groups and their values (for `regexp` and `grok`). See examples below.
+, `parsed` is a map containing all the captured groups and their values (for `regexp` and `grok`).
+
+All parsers share the following options:
+
+| Option | Type | Description | Default Value |
+| ------ | ---- | ----------- | ------------- |
+| `abortOnFailure` | `boolean` | Whether the filter chain should abort if the parser failed to parse / match the logs | `true` |
+
+See examples below.
#### `json`
-<!-- TODO: is structured in the reported (gRPC) `LogData`, not much to do -->
+```groovy
+filter {
+ json {
+ abortOnFailure true // this is optional because it's default behaviour
+ }
+}
+```
#### `yaml`
-<!-- TODO: is structured in the reported (gRPC) `LogData`, not much to do -->
+```groovy
+filter {
+ yaml {
+ abortOnFailure true // this is optional because it's default behaviour
+ }
+}
+```
#### `text`
@@ -78,8 +92,9 @@ all the captured groups can be used later in the extractors or sinks.
```groovy
filter {
text {
- regexp "(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)"
+ abortOnFailure true // this is optional because it's default behaviour
// this is just a demo pattern
+ regexp "(?<timestamp>\\d{8}) (?<thread>\\w+) (?<level>\\w+) (?<traceId>\\w+) (?<msg>.+)"
}
extractor {
tag level: parsed.level
@@ -91,9 +106,10 @@ filter {
}
```
-- `grok`
+- `grok` (TODO)
-<!-- TODO: grok Java library has poor performance, need to benchmark it, the idea is basically the same with `regexp` above -->
+Because grok Java library has performance issue, we need some investigations and benchmark on it. Contributions are
+welcome.
### Extractor
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java
index 9cfe593..7683bb6 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/filter/FilterSpec.java
@@ -98,11 +98,17 @@ public class FilterSpec extends AbstractSpec {
cl.call();
final LogData.Builder logData = BINDING.get().log();
- final Map<String, Object> parsed = jsonParser.create().fromJson(
- logData.getBody().getJson().getJson(), parsedType
- );
-
- BINDING.get().parsed(parsed);
+ try {
+ final Map<String, Object> parsed = jsonParser.create().fromJson(
+ logData.getBody().getJson().getJson(), parsedType
+ );
+
+ BINDING.get().parsed(parsed);
+ } catch (final Exception e) {
+ if (jsonParser.abortOnFailure()) {
+ BINDING.get().abort();
+ }
+ }
}
@SuppressWarnings({"unused", "unchecked"})
@@ -114,11 +120,17 @@ public class FilterSpec extends AbstractSpec {
cl.call();
final LogData.Builder logData = BINDING.get().log();
- final Map<String, Object> parsed = (Map<String, Object>) yamlParser.create().load(
- logData.getBody().getYaml().getYaml()
- );
-
- BINDING.get().parsed(parsed);
+ try {
+ final Map<String, Object> parsed = (Map<String, Object>) yamlParser.create().load(
+ logData.getBody().getYaml().getYaml()
+ );
+
+ BINDING.get().parsed(parsed);
+ } catch (final Exception e) {
+ if (yamlParser.abortOnFailure()) {
+ BINDING.get().abort();
+ }
+ }
}
@SuppressWarnings("unused")
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/AbstractParserSpec.java
similarity index 66%
copy from oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
copy to oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/AbstractParserSpec.java
index 6bb716d..e410cbe 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/AbstractParserSpec.java
@@ -18,23 +18,26 @@
package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
+import lombok.Getter;
+import lombok.Setter;
+import lombok.experimental.Accessors;
import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager;
-public class JsonParserSpec extends AbstractSpec {
- private final GsonBuilder gsonBuilder;
+@Accessors(fluent = true)
+public class AbstractParserSpec extends AbstractSpec {
+ /**
+ * Whether the filter chain should abort when parsing the logs failed.
+ *
+ * Failing to parse the logs means either parsing throws exceptions or the logs not matching the desired patterns.
+ */
+ @Getter
+ @Setter
+ private boolean abortOnFailure = true;
- public JsonParserSpec(final ModuleManager moduleManager,
- final LogAnalyzerModuleConfig moduleConfig) {
+ public AbstractParserSpec(final ModuleManager moduleManager,
+ final LogAnalyzerModuleConfig moduleConfig) {
super(moduleManager, moduleConfig);
-
- gsonBuilder = new GsonBuilder();
- }
-
- public Gson create() {
- return gsonBuilder.create();
}
}
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
index 6bb716d..850daab 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/JsonParserSpec.java
@@ -20,21 +20,26 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
-import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager;
-public class JsonParserSpec extends AbstractSpec {
+public class JsonParserSpec extends AbstractParserSpec {
private final GsonBuilder gsonBuilder;
+ private final Gson gson;
+
public JsonParserSpec(final ModuleManager moduleManager,
final LogAnalyzerModuleConfig moduleConfig) {
super(moduleManager, moduleConfig);
gsonBuilder = new GsonBuilder();
+
+ // We just create a gson instance in advance for now (for the sake of performance),
+ // when we want to provide some extra options, we'll move this into method "create" then.
+ gson = gsonBuilder.create();
}
public Gson create() {
- return gsonBuilder.create();
+ return gson;
}
}
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java
index 0ffccab..c77f639 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/TextParserSpec.java
@@ -21,11 +21,10 @@ package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.skywalking.apm.network.logging.v3.LogData;
-import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager;
-public class TextParserSpec extends AbstractSpec {
+public class TextParserSpec extends AbstractParserSpec {
public TextParserSpec(final ModuleManager moduleManager,
final LogAnalyzerModuleConfig moduleConfig) {
super(moduleManager, moduleConfig);
@@ -45,6 +44,8 @@ public class TextParserSpec extends AbstractSpec {
final boolean matched = matcher.find();
if (matched) {
BINDING.get().parsed(matcher);
+ } else if (abortOnFailure()) {
+ BINDING.get().abort();
}
return matched;
}
diff --git a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java
index 2bb22b9..bbd4dc4 100644
--- a/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java
+++ b/oap-server/analyzer/log-analyzer/src/main/java/org/apache/skywalking/oap/log/analyzer/dsl/spec/parser/YamlParserSpec.java
@@ -18,7 +18,6 @@
package org.apache.skywalking.oap.log.analyzer.dsl.spec.parser;
-import org.apache.skywalking.oap.log.analyzer.dsl.spec.AbstractSpec;
import org.apache.skywalking.oap.log.analyzer.provider.LogAnalyzerModuleConfig;
import org.apache.skywalking.oap.server.library.module.ModuleManager;
import org.yaml.snakeyaml.DumperOptions;
@@ -27,7 +26,7 @@ import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.SafeConstructor;
import org.yaml.snakeyaml.representer.Representer;
-public class YamlParserSpec extends AbstractSpec {
+public class YamlParserSpec extends AbstractParserSpec {
private final LoaderOptions loaderOptions;
public YamlParserSpec(final ModuleManager moduleManager,
diff --git a/test/e2e/e2e-test/docker/log/lal.yaml b/test/e2e/e2e-test/docker/log/lal.yaml
index d0d6a19..2b94988 100644
--- a/test/e2e/e2e-test/docker/log/lal.yaml
+++ b/test/e2e/e2e-test/docker/log/lal.yaml
@@ -18,6 +18,7 @@ rules:
dsl: |
filter {
text {
+ abortOnFailure false // for test purpose, we want to persist all logs
regexp $/(?s)(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3}) \[TID:(?<tid>.+?)] \[(?<thread>.+?)] (?<level>\w{4,}) (?<logger>.{1,36}) (?<msg>.+)/$
}
extractor {