You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@chukwa.apache.org by as...@apache.org on 2009/08/04 23:10:57 UTC
svn commit: r800976 - in /hadoop/chukwa/trunk: ./
src/docs/src/documentation/content/xdocs/ src/java/org/apache/hadoop/chukwa/
src/java/org/apache/hadoop/chukwa/datacollection/writer/
src/java/org/apache/hadoop/chukwa/extraction/engine/ src/java/org/ap...
Author: asrabkin
Date: Tue Aug 4 21:10:56 2009
New Revision: 800976
URL: http://svn.apache.org/viewvc?rev=800976&view=rev
Log:
CHUKWA-366. Filter by custom tags
Added:
hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/Filter.java
hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestFilter.java
Modified:
hadoop/chukwa/trunk/CHANGES.txt
hadoop/chukwa/trunk/src/docs/src/documentation/content/xdocs/programming.xml
hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/Chunk.java
hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/ChunkImpl.java
hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/datacollection/writer/SocketTeeWriter.java
hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/engine/RecordUtil.java
hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/DumpChunks.java
hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/ChunkImplTest.java
hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestDumpChunks.java
Modified: hadoop/chukwa/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/CHANGES.txt?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/CHANGES.txt (original)
+++ hadoop/chukwa/trunk/CHANGES.txt Tue Aug 4 21:10:56 2009
@@ -4,6 +4,8 @@
NEW FEATURES
+ CHUKWA-366. Custom tags. (asrabkin)
+
CHUKWA-358. Real-time monitoring at collector. (asrabkin)
CHUKWA-352. Xtrace in contrib. (asrabkin)
Modified: hadoop/chukwa/trunk/src/docs/src/documentation/content/xdocs/programming.xml
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/docs/src/documentation/content/xdocs/programming.xml?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/docs/src/documentation/content/xdocs/programming.xml (original)
+++ hadoop/chukwa/trunk/src/docs/src/documentation/content/xdocs/programming.xml Tue Aug 4 21:10:56 2009
@@ -76,9 +76,16 @@
<code>metadatafield</code> is one of the Chukwa metadata fields, and
<code>regex</code> is a regular expression. The valid metadata field names are:
<code>datatype</code>, <code>host</code>, <code>cluster</code>,
-<code>content</code> and <code>name</code>.
+<code>content</code>, <code>name</code>. Note that the <code>name</code> field matches the stream name -- often the filename
+that the data was extracted from.
</p>
+<p>
+In addition, you can match arbitrary tags via <code>tags.tagname</code>.
+So for instance, to match chunks with tag <code>foo="bar"</code> you could say
+<code>tags.foo=bar</code>. Note that quotes are present in the tag, but not
+in the filter rule.</p>
+
<p>A stream matches the search pattern only if every rule matches. So to
retrieve HadoopLog data from cluster foo, you might search for
<code>cluster=foo&datatype=HadoopLog</code>.
Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/Chunk.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/Chunk.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/Chunk.java (original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/Chunk.java Tue Aug 4 21:10:56 2009
@@ -118,6 +118,13 @@
*/
public void addTag(String tag);
+ /**
+ * Returns the value of a single tag, assuming tags are of the form
+ * tagname="val"
+ * @param tagName the tag to return
+ * @return null if not matched.
+ */
+ public String getTag(String tagName);
public void write(DataOutput data) throws IOException;
}
Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/ChunkImpl.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/ChunkImpl.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/ChunkImpl.java (original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/ChunkImpl.java Tue Aug 4 21:10:56 2009
@@ -24,6 +24,8 @@
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.hadoop.chukwa.datacollection.DataFactory;
import org.apache.hadoop.chukwa.datacollection.adaptor.Adaptor;
@@ -185,7 +187,21 @@
public String getTags() {
return tags;
}
-
+
+ /**
+ * @see org.apache.hadoop.chukwa.Chunk#getTag(java.lang.String)
+ */
+ public String getTag(String tagName) {
+ Pattern tagPattern = Pattern.compile(".*"+tagName+"=\"(.*)\".*");
+ if (tags != null) {
+ Matcher matcher = tagPattern.matcher(tags);
+ if (matcher.matches()) {
+ return matcher.group(1);
+ }
+ }
+ return null;
+ }
+
/**
* @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
*/
Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/datacollection/writer/SocketTeeWriter.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/datacollection/writer/SocketTeeWriter.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/datacollection/writer/SocketTeeWriter.java (original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/datacollection/writer/SocketTeeWriter.java Tue Aug 4 21:10:56 2009
@@ -23,7 +23,7 @@
import java.util.regex.PatternSyntaxException;
import org.apache.hadoop.chukwa.Chunk;
import org.apache.hadoop.chukwa.util.DumpChunks;
-import static org.apache.hadoop.chukwa.util.DumpChunks.Filter;
+import org.apache.hadoop.chukwa.util.Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Logger;
import org.mortbay.log.Log;
@@ -114,7 +114,7 @@
Socket sock;
BufferedReader in;
DataOutputStream out;
- DumpChunks.Filter rules;
+ Filter rules;
boolean sendRawBytes;
final BlockingQueue<Chunk> sendQ;
public Tee(Socket s) throws IOException {
@@ -174,7 +174,7 @@
}
String cmdAfterSpace = cmd.substring(cmd.indexOf(' ')+1);
- rules = new DumpChunks.Filter(cmdAfterSpace);
+ rules = new Filter(cmdAfterSpace);
out = new DataOutputStream(sock.getOutputStream());
//now that we read everything OK we can add ourselves to list, and return.
Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/engine/RecordUtil.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/engine/RecordUtil.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/engine/RecordUtil.java (original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/engine/RecordUtil.java Tue Aug 4 21:10:56 2009
@@ -6,6 +6,10 @@
import org.apache.hadoop.chukwa.Chunk;
+/**
+ * Various utility methods.
+ *
+ */
public class RecordUtil {
static Pattern clusterPattern = Pattern
.compile("(.*)?cluster=\"(.*?)\"(.*)?");
@@ -21,6 +25,11 @@
return "undefined";
}
+ /**
+ * Uses a precompiled pattern, so theoretically faster than
+ * Chunk.getTag().
+ *
+ */
public static String getClusterName(Chunk chunk) {
String tags = chunk.getTags();
if (tags != null) {
@@ -32,4 +41,6 @@
return "undefined";
}
+
+
}
Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/DumpChunks.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/DumpChunks.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/DumpChunks.java (original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/DumpChunks.java Tue Aug 4 21:10:56 2009
@@ -36,94 +36,7 @@
public class DumpChunks {
- private static class SearchRule {
- Pattern p;
- String targ;
-
- SearchRule(Pattern p, String t) {
- this.p = p;
- this.targ = t;
- }
-
- boolean matches(Chunk chunk) {
- if(targ.equals("datatype")) {
- return p.matcher(chunk.getDataType()).matches();
- } else if(targ.equals("name")) {
- return p.matcher(chunk.getStreamName()).matches();
- } else if(targ.equals("host")) {
- return p.matcher(chunk.getSource()).matches();
- } else if(targ.equals("cluster")) {
- String cluster = RecordUtil.getClusterName(chunk);
- return p.matcher(cluster).matches();
- } else if(targ.equals("content")) {
- String content = new String(chunk.getData());
- Matcher m = p.matcher(content);
- return m.matches();
- }
- else {
- assert false: "unknown target: " +targ;
- return false;
- }
- }
-
- public String toString() {
- return targ + "=" +p.toString();
- }
-
- }
- public static class Filter {
- List<SearchRule> compiledPatterns;
-
- public Filter(String listOfPatterns) throws PatternSyntaxException{
- compiledPatterns = new ArrayList<SearchRule>();
- //FIXME: could escape these
- String[] patterns = listOfPatterns.split(SEPARATOR);
- for(String p: patterns) {
- int equalsPos = p.indexOf('=');
-
- if(equalsPos < 0 || equalsPos > (p.length() -2)) {
- throw new PatternSyntaxException(
- "pattern must be of form targ=pattern", p, -1);
- }
-
- String targ = p.substring(0, equalsPos);
- if(!ArrayUtils.contains(SEARCH_TARGS, targ)) {
- throw new PatternSyntaxException(
- "pattern doesn't start with recognized search target", p, -1);
- }
-
- Pattern pat = Pattern.compile(p.substring(equalsPos+1), Pattern.DOTALL);
- compiledPatterns.add(new SearchRule(pat, targ));
- }
- }
-
- public boolean matches(Chunk chunk) {
- for(SearchRule r: compiledPatterns) {
- if(!r.matches(chunk))
- return false;
- }
- return true;
- }
-
- public int size() {
- return compiledPatterns.size();
- }
-
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(compiledPatterns.get(0));
- for(int i=1; i < compiledPatterns.size(); ++i) {
- sb.append(" & ");
- sb.append(compiledPatterns.get(i));
- }
- return sb.toString();
- }
- }//end class
-
- static final String[] SEARCH_TARGS = {"datatype", "name", "host", "cluster", "content"};
-
- static final String SEPARATOR="&";
/**
* Tries to find chunks matching a given pattern.
* Takes as input a set of &-delimited patterns, followed
Added: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/Filter.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/Filter.java?rev=800976&view=auto
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/Filter.java (added)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/Filter.java Tue Aug 4 21:10:56 2009
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.chukwa.util;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.chukwa.Chunk;
+import org.apache.hadoop.chukwa.extraction.engine.RecordUtil;
+
+
+public class Filter {
+
+ private static final String[] SEARCH_TARGS =
+ {"datatype", "name", "host", "cluster", "content"};
+ static final String SEPARATOR="&";
+
+ private static class SearchRule {
+ Pattern p;
+ String targ;
+
+ SearchRule(Pattern p, String t) {
+ this.p = p;
+ this.targ = t;
+ }
+
+ boolean matches(Chunk chunk) {
+ if(targ.equals("datatype")) {
+ return p.matcher(chunk.getDataType()).matches();
+ } else if(targ.equals("name")) {
+ return p.matcher(chunk.getStreamName()).matches();
+ } else if(targ.equals("host")) {
+ return p.matcher(chunk.getSource()).matches();
+ } else if(targ.equals("cluster")) {
+ String cluster = RecordUtil.getClusterName(chunk);
+ return p.matcher(cluster).matches();
+ } else if(targ.equals("content")) {
+ String content = new String(chunk.getData());
+ return p.matcher(content).matches();
+ } else if(targ.startsWith("tags.")) {
+ String tagName = targ.substring("tags.".length());
+ String tagVal = chunk.getTag(tagName);
+ if(tagVal == null)
+ return false;
+ return p.matcher(tagVal).matches();
+ } else {
+ assert false: "unknown target: " +targ;
+ return false;
+ }
+ }
+
+ public String toString() {
+ return targ + "=" +p.toString();
+ }
+ }
+
+ List<SearchRule> compiledPatterns;
+
+ public Filter(String listOfPatterns) throws PatternSyntaxException{
+ compiledPatterns = new ArrayList<SearchRule>();
+ //FIXME: could escape these
+ String[] patterns = listOfPatterns.split(SEPARATOR);
+ for(String p: patterns) {
+ int equalsPos = p.indexOf('=');
+
+ if(equalsPos < 0 || equalsPos > (p.length() -2)) {
+ throw new PatternSyntaxException(
+ "pattern must be of form targ=pattern", p, -1);
+ }
+
+ String targ = p.substring(0, equalsPos);
+ if(!targ.startsWith("tags.") && !ArrayUtils.contains(SEARCH_TARGS, targ)) {
+ throw new PatternSyntaxException(
+ "pattern doesn't start with recognized search target", p, -1);
+ }
+
+ Pattern pat = Pattern.compile(p.substring(equalsPos+1), Pattern.DOTALL);
+ compiledPatterns.add(new SearchRule(pat, targ));
+ }
+ }
+
+ public boolean matches(Chunk chunk) {
+ for(SearchRule r: compiledPatterns) {
+ if(!r.matches(chunk))
+ return false;
+ }
+ return true;
+ }
+
+ public int size() {
+ return compiledPatterns.size();
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(compiledPatterns.get(0));
+ for(int i=1; i < compiledPatterns.size(); ++i) {
+ sb.append(" & ");
+ sb.append(compiledPatterns.get(i));
+ }
+ return sb.toString();
+ }
+}//end class
Modified: hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/ChunkImplTest.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/ChunkImplTest.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/ChunkImplTest.java (original)
+++ hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/ChunkImplTest.java Tue Aug 4 21:10:56 2009
@@ -45,4 +45,15 @@
// right behavior, do nothing
}
}
+
+ public void testTag() {
+ ChunkBuilder cb = new ChunkBuilder();
+ cb.addRecord("foo".getBytes());
+ cb.addRecord("bar".getBytes());
+ cb.addRecord("baz".getBytes());
+ Chunk c = cb.getChunk();
+ assertNull(c.getTag("foo"));
+ c.addTag("foo=\"bar\"");
+ assertEquals("bar", c.getTag("foo"));
+ }
}
Modified: hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestDumpChunks.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestDumpChunks.java?rev=800976&r1=800975&r2=800976&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestDumpChunks.java (original)
+++ hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestDumpChunks.java Tue Aug 4 21:10:56 2009
@@ -55,41 +55,7 @@
seqFileWriter.close();
out.close();
}
-
- public void testBasicPatternMatching() {
- try {
- DumpChunks.Filter rules = new DumpChunks.Filter("host=foo.*&cluster=bar&datatype=Data");
- assertEquals(3, rules.size());
- byte[] dat = "someText".getBytes();
- ChunkImpl chunkNone = new ChunkImpl("badData","aname", dat.length, dat, null);
- assertFalse(rules.matches(chunkNone));
- //do the right thing on a non-match
- ChunkImpl chunkSome = new ChunkImpl("badData", "aname", dat.length, dat, null);
- chunkSome.setSource("fooly");
- chunkSome.addTag("cluster=\"bar\"");
- assertFalse(rules.matches( chunkSome));
-
- ChunkImpl chunkAll = new ChunkImpl("Data", "aname", dat.length, dat, null);
- chunkAll.setSource("fooly");
- chunkAll.addTag("cluster=\"bar\"");
-
- System.out.println("chunk is " + chunkAll);
- assertTrue(rules.matches(chunkAll));
-
- //check that we match content correctly
- rules = new DumpChunks.Filter("content=someText");
- assertTrue(rules.matches(chunkAll));
- rules = new DumpChunks.Filter("content=some");
- assertFalse(rules.matches( chunkAll));
- rules = new DumpChunks.Filter("datatype=Data&content=.*some.*");
- assertTrue(rules.matches( chunkAll));
-
- } catch(Exception e) {
- fail("exception " + e);
- }
- }
-
public void testFilePatternMatching() throws IOException, java.net.URISyntaxException {
File tempDir = new File(System.getProperty("test.build.data", "/tmp"));
Added: hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestFilter.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestFilter.java?rev=800976&view=auto
==============================================================================
--- hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestFilter.java (added)
+++ hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/util/TestFilter.java Tue Aug 4 21:10:56 2009
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.chukwa.util;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.chukwa.ChunkImpl;
+
+public class TestFilter extends TestCase {
+
+ public void testBasicPatternMatching() {
+ try {
+ Filter rules = new Filter("host=foo.*&cluster=bar&datatype=Data");
+ assertEquals(3, rules.size());
+ byte[] dat = "someText".getBytes();
+ ChunkImpl chunkNone = new ChunkImpl("badData","aname", dat.length, dat, null);
+ assertFalse(rules.matches(chunkNone));
+
+ //do the right thing on a non-match
+ ChunkImpl chunkSome = new ChunkImpl("badData", "aname", dat.length, dat, null);
+ chunkSome.setSource("fooly");
+ chunkSome.addTag("cluster=\"bar\"");
+ assertFalse(rules.matches( chunkSome));
+
+ ChunkImpl chunkAll = new ChunkImpl("Data", "aname", dat.length, dat, null);
+ chunkAll.setSource("fooly");
+ chunkAll.addTag("cluster=\"bar\"");
+
+ assertTrue(rules.matches(chunkAll));
+
+ //check that we match content correctly
+ rules = new Filter("content=someText");
+ assertTrue(rules.matches(chunkAll));
+ rules = new Filter("content=some");
+ assertFalse(rules.matches( chunkAll));
+ rules = new Filter("datatype=Data&content=.*some.*");
+ assertTrue(rules.matches( chunkAll));
+
+ } catch(Exception e) {
+ fail("exception " + e);
+ }
+ }
+
+ public void testClusterPatterns() {
+ byte[] dat = "someText".getBytes();
+ ChunkImpl chunk1 = new ChunkImpl("Data", "aname", dat.length, dat, null);
+ chunk1.setSource("asource");
+ Filter rule = new Filter("tags.foo=bar");
+
+ assertFalse(rule.matches(chunk1));
+ chunk1.addTag("foo=\"bar\"");
+ assertTrue(rule.matches(chunk1));
+ }
+
+}