You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by sa...@apache.org on 2017/04/07 19:34:25 UTC
oozie git commit: OOZIE-2457 Oozie log parsing regex consume more
than 90% cpu (satishsaley)
Repository: oozie
Updated Branches:
refs/heads/master cd699bf56 -> 0f27f92d2
OOZIE-2457 Oozie log parsing regex consume more than 90% cpu (satishsaley)
Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/0f27f92d
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/0f27f92d
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/0f27f92d
Branch: refs/heads/master
Commit: 0f27f92d26abfa9b9fa54681ace5486b79e9959e
Parents: cd699bf
Author: satishsaley <sa...@apache.org>
Authored: Fri Apr 7 11:57:44 2017 -0700
Committer: satishsaley <sa...@apache.org>
Committed: Fri Apr 7 11:57:44 2017 -0700
----------------------------------------------------------------------
.../java/org/apache/oozie/util/LogLine.java | 64 ++++++++++++++++
.../util/SimpleTimestampedMessageParser.java | 15 +++-
.../oozie/util/TimestampedMessageParser.java | 81 +++++++++++++-------
.../java/org/apache/oozie/util/XLogFilter.java | 76 ++++++++++++++++--
release-log.txt | 1 +
5 files changed, 200 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/oozie/blob/0f27f92d/core/src/main/java/org/apache/oozie/util/LogLine.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/util/LogLine.java b/core/src/main/java/org/apache/oozie/util/LogLine.java
new file mode 100644
index 0000000..88bf56b
--- /dev/null
+++ b/core/src/main/java/org/apache/oozie/util/LogLine.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oozie.util;
+
+import java.util.ArrayList;
+
+/**
+ * A wrapper over log line to save redundant use of regex
+ */
+public class LogLine {
+ /**
+ * List to hold parts of log message.
+ */
+ private ArrayList<String> logParts;
+ private String line;
+ /**
+ * Boolean to indicate if this log line has matched to the given log pattern
+ */
+ private MATCHED_PATTERN pattern = MATCHED_PATTERN.NONE;
+
+ public ArrayList<String> getLogParts() {
+ return logParts;
+ }
+
+ public void setLogParts(ArrayList<String> logParts) {
+ this.logParts = logParts;
+ }
+
+ public String getLine() {
+ return line;
+ }
+
+ public void setLine(String line) {
+ this.line = line;
+ }
+
+ public MATCHED_PATTERN getMatchedPattern() {
+ return pattern;
+ }
+
+ public void setMatchedPattern(MATCHED_PATTERN pattern) {
+ this.pattern = pattern;
+ }
+
+ enum MATCHED_PATTERN {
+ SPLIT, GENENRIC, NONE;
+ }
+}
http://git-wip-us.apache.org/repos/asf/oozie/blob/0f27f92d/core/src/main/java/org/apache/oozie/util/SimpleTimestampedMessageParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/util/SimpleTimestampedMessageParser.java b/core/src/main/java/org/apache/oozie/util/SimpleTimestampedMessageParser.java
index 78cb042..0b96ad4 100644
--- a/core/src/main/java/org/apache/oozie/util/SimpleTimestampedMessageParser.java
+++ b/core/src/main/java/org/apache/oozie/util/SimpleTimestampedMessageParser.java
@@ -35,11 +35,18 @@ public class SimpleTimestampedMessageParser extends TimestampedMessageParser {
/**
* This implementation simply returns the next line from the Reader.
*
- * @return The next line from the Reader
+ * @return LogLine containing the next line from the Reader
* @throws IOException
*/
@Override
- protected String parseNextLine() throws IOException {
- return reader.readLine();
+ protected LogLine parseNextLogLine() throws IOException {
+ String line = null;
+ if ((line = reader.readLine()) != null) {
+ LogLine logLine = new LogLine();
+ logLine.setLine(line);
+ logLine.setLogParts(splitLogMessage(line));
+ return logLine;
+ }
+ return null;
}
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/oozie/blob/0f27f92d/core/src/main/java/org/apache/oozie/util/TimestampedMessageParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/util/TimestampedMessageParser.java b/core/src/main/java/org/apache/oozie/util/TimestampedMessageParser.java
index a676f4d..830f287 100644
--- a/core/src/main/java/org/apache/oozie/util/TimestampedMessageParser.java
+++ b/core/src/main/java/org/apache/oozie/util/TimestampedMessageParser.java
@@ -22,8 +22,11 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
+import java.util.regex.Pattern;
+
import org.apache.oozie.service.Services;
import org.apache.oozie.service.XLogStreamingService;
+import org.apache.oozie.util.LogLine.MATCHED_PATTERN;
/**
* Encapsulates the parsing and filtering of the log messages from a BufferedReader. It makes sure not to read in the entire log
@@ -39,13 +42,14 @@ public class TimestampedMessageParser {
static final String SYSTEM_LINE_SEPARATOR = System.getProperty("line.separator");
protected BufferedReader reader;
- private String nextLine = null;
+ private LogLine nextLine = null;
private String lastTimestamp = null;
private XLogFilter filter;
private boolean empty = false;
private String lastMessage = null;
private boolean patternMatched = false;
public int count = 0;
+ private Pattern splitPattern = null;
/**
* Creates a TimestampedMessageParser with the given BufferedReader and filter.
@@ -60,6 +64,8 @@ public class TimestampedMessageParser {
filter = new XLogFilter();
}
filter.constructPattern();
+ String regEx = XLogFilter.PREFIX_REGEX + filter.getFilterPattern().pattern();
+ this.splitPattern = Pattern.compile(regEx);
}
@@ -75,10 +81,10 @@ public class TimestampedMessageParser {
}
StringBuilder message = new StringBuilder();
-
if (nextLine == null) { // first time only
- nextLine = parseNextLine();
- if (nextLine == null) { // reader finished
+ nextLine = parseNextLogLine();
+ if (nextLine == null || nextLine.getLine() == null) {
+ // reader finished
empty = true;
return false;
}
@@ -86,10 +92,11 @@ public class TimestampedMessageParser {
lastTimestamp = parseTimestamp(nextLine);
String nextTimestamp = null;
while (nextTimestamp == null) {
- message.append(nextLine).append(SYSTEM_LINE_SEPARATOR);
- nextLine = parseNextLine();
- if (nextLine != null) {
- nextTimestamp = parseTimestamp(nextLine); // exit loop if we have a timestamp, continue if not
+ message.append(nextLine.getLine()).append(SYSTEM_LINE_SEPARATOR);
+ nextLine = parseNextLogLine();
+ if (nextLine != null && nextLine.getLine() != null) {
+ // exit loop if we have a timestamp, continue if not
+ nextTimestamp = parseTimestamp(nextLine);
}
else { // reader finished
empty = true;
@@ -129,52 +136,59 @@ public class TimestampedMessageParser {
}
/**
- * Reads the next line from the Reader and checks if it matches the filter. It can also handle multi-line messages (i.e.
- * exception stack traces). If it returns null, then there are no lines left in the Reader.
+ * Reads the next line from the Reader and checks if it matches the filter.
+ * It can also handle multi-line messages (i.e. exception stack traces). If
+ * it returns null, then there are no lines left in the Reader.
*
- * @return The next line, or null
+ * @return LogLine
* @throws IOException
*/
- protected String parseNextLine() throws IOException {
+ protected LogLine parseNextLogLine() throws IOException {
String line;
+ LogLine logLine = new LogLine();
while ((line = reader.readLine()) != null) {
- ArrayList<String> logParts = filter.splitLogMessage(line);
- if (logParts != null) {
- patternMatched = filter.matches(logParts);
- }
+ logLine.setLine(line);
+ logLine.setLogParts(null);
+ filter.splitLogMessage(logLine, splitPattern);
+ // check the splits if logLine matches with the splitPattern
+ // Otherwise, go with previous patternMatched value. This is needed
+ // in parsing stack trace
+ patternMatched = logLine.getMatchedPattern() == MATCHED_PATTERN.NONE ? patternMatched
+ : filter.splitsMatches(logLine);
if (patternMatched) {
if (filter.getLogLimit() != -1) {
- if (logParts != null) {
+ if (logLine.getLogParts() != null) {
if (count >= filter.getLogLimit()) {
return null;
}
count++;
}
}
- if (logParts != null) {
+ if (logLine.getLogParts() != null) {
if (filter.getEndDate() != null) {
- //Ignore the milli second part
- if (logParts.get(0).substring(0, 19).compareTo(filter.getFormattedEndDate()) > 0)
+ // Ignore the milli second part
+ if (logLine.getLogParts().get(0).substring(0, 19).compareTo(filter.getFormattedEndDate()) > 0)
return null;
}
}
- return line;
+ return logLine;
}
}
- return line;
+ logLine.setLine(null);
+ return logLine;
}
/**
- * Parses the timestamp out of the passed in line. If there isn't one, it returns null.
+ * Parses the timestamp out of the passed in line. If there isn't one, it
+ * returns null.
*
- * @param line The line to check
+ * @param logLine The LogLine to check
* @return the timestamp of the line, or null
*/
- private String parseTimestamp(String line) {
+ private String parseTimestamp(LogLine logLine) {
String timestamp = null;
- ArrayList<String> logParts = filter.splitLogMessage(line);
- if (logParts != null) {
- timestamp = logParts.get(0);
+ if (logLine != null && logLine.getLogParts() != null && logLine.getLogParts().size() > 0) {
+ timestamp = logLine.getLogParts().get(0);
}
return timestamp;
}
@@ -223,4 +237,15 @@ public class TimestampedMessageParser {
processRemaining(writer, Services.get().get(XLogStreamingService.class).getBufferLen());
}
+ /**
+ * Splits the log message into parts
+ *
+ * @param line
+ * @return List of log parts
+ */
+ protected ArrayList<String> splitLogMessage(String line) {
+ return filter.splitLogMessage(line);
+ }
+
+
}
http://git-wip-us.apache.org/repos/asf/oozie/blob/0f27f92d/core/src/main/java/org/apache/oozie/util/XLogFilter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/util/XLogFilter.java b/core/src/main/java/org/apache/oozie/util/XLogFilter.java
index 3b49f77..cdf172b 100644
--- a/core/src/main/java/org/apache/oozie/util/XLogFilter.java
+++ b/core/src/main/java/org/apache/oozie/util/XLogFilter.java
@@ -24,12 +24,15 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.oozie.service.ConfigurationService;
+import org.apache.oozie.util.LogLine.MATCHED_PATTERN;
+
import com.google.common.annotations.VisibleForTesting;
/**
@@ -61,7 +64,7 @@ public class XLogFilter {
private static final String TIMESTAMP_REGEX = "(\\d\\d\\d\\d-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d,\\d\\d\\d)";
private static final String WHITE_SPACE_REGEX = "\\s+";
private static final String LOG_LEVEL_REGEX = "(\\w+)";
- private static final String PREFIX_REGEX = TIMESTAMP_REGEX + WHITE_SPACE_REGEX + LOG_LEVEL_REGEX
+ static final String PREFIX_REGEX = TIMESTAMP_REGEX + WHITE_SPACE_REGEX + LOG_LEVEL_REGEX
+ WHITE_SPACE_REGEX;
private static final Pattern SPLITTER_PATTERN = Pattern.compile(PREFIX_REGEX + ALLOW_ALL_REGEX);
@@ -121,6 +124,33 @@ public class XLogFilter {
* @param logParts
* @return
*/
+ public boolean splitsMatches(LogLine logLine) {
+ // Check whether logLine matched with filter
+ if (logLine.getMatchedPattern() != MATCHED_PATTERN.SPLIT) {
+ return false;
+ }
+ ArrayList<String> logParts = logLine.getLogParts();
+ if (getStartDate() != null) {
+ if (logParts.get(0).substring(0, 19).compareTo(getFormattedStartDate()) < 0) {
+ return false;
+ }
+ }
+ String logLevel = logParts.get(1);
+ if (this.logLevels == null || this.logLevels.containsKey(logLevel.toUpperCase(Locale.ENGLISH))) {
+ // line contains the permitted logLevel
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /**
+ * Checks if the logLevel and logMessage goes through the logFilter.
+ *
+ * @param logParts
+ * @return
+ */
public boolean matches(ArrayList<String> logParts) {
if (getStartDate() != null) {
if (logParts.get(0).substring(0, 19).compareTo(getFormattedStartDate()) < 0) {
@@ -139,8 +169,9 @@ public class XLogFilter {
}
/**
- * Splits the log line into timestamp, logLevel and remaining log message. Returns array containing timestamp,
- * logLevel, and logMessage if the pattern matches i.e A new log statement, else returns null.
+ * Splits the log line into timestamp, logLevel and remaining log message.
+ * Returns array containing timestamp, logLevel, and logMessage if the
+ * pattern matches i.e A new log statement, else returns null.
*
* @param logLine
* @return Array containing log level and log message
@@ -160,8 +191,39 @@ public class XLogFilter {
}
/**
- * Constructs the regular expression according to the filter and assigns it to fileterPattarn. ".*" will be assigned
- * if no filters are set.
+ * If <code>logLine</code> matches with <code>splitPattern</code>,
+ * <ol>
+ * <li>Split the log line into timestamp, logLevel and remaining log
+ * message.</li>
+ * <li>Record the parts of message in <code>logLine</code> to avoid regex
+ * matching in future.</li>
+ * <li>Record the pattern to which <code>logLine</code> has matched.</li>
+ * </ol>
+ * @param logLine
+ * @param splitPattern
+ */
+ public void splitLogMessage(LogLine logLine, Pattern splitPattern) {
+ Matcher splitterWithJobId = splitPattern.matcher(logLine.getLine());
+ Matcher allowAll = SPLITTER_PATTERN.matcher(logLine.getLine());
+ if (splitterWithJobId.matches()) {
+ ArrayList<String> logParts = new ArrayList<String>(3);
+ logParts.add(splitterWithJobId.group(1));// timestamp
+ logParts.add(splitterWithJobId.group(2));// log level
+ logParts.add(splitterWithJobId.group(3));// log message
+ logLine.setLogParts(logParts);
+ logLine.setMatchedPattern(MATCHED_PATTERN.SPLIT);
+ }
+ else if (allowAll.matches()) {
+ logLine.setMatchedPattern(MATCHED_PATTERN.GENENRIC);
+ }
+ else {
+ logLine.setMatchedPattern(MATCHED_PATTERN.NONE);
+ }
+ }
+
+ /**
+ * Constructs the regular expression according to the filter and assigns it
+ * to fileterPattarn. ".*" will be assigned if no filters are set.
*/
public void constructPattern() {
if (noFilter && logLevels == null) {
@@ -346,4 +408,8 @@ public class XLogFilter {
this.filterPattern = filterPattern;
}
+ public Pattern getFilterPattern() {
+ return this.filterPattern;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/oozie/blob/0f27f92d/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 060e742..cb81d42 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
-- Oozie 4.4.0 release (trunk - unreleased)
+OOZIE-2457 Oozie log parsing regex consume more than 90% cpu (satishsaley)
OOZIE-2844 Increase stability of Oozie actions when log4j.properties is missing or not readable (andras.piros via pbacsko)
OOZIE-2701 Oozie to support Multiple HCatalog URIs (abhishekbafna)
OOZIE-2850 Fix default callback notifications (asasvari via gezapeti)