You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@chukwa.apache.org by ey...@apache.org on 2015/06/25 22:48:35 UTC
[04/22] chukwa git commit: CHUKWA-749. Added Chukwa tags to Solr
schema for indexing logs. (Eric Yang)
CHUKWA-749. Added Chukwa tags to Solr schema for indexing logs. (Eric Yang)
Project: http://git-wip-us.apache.org/repos/asf/chukwa/repo
Commit: http://git-wip-us.apache.org/repos/asf/chukwa/commit/2d20ab59
Tree: http://git-wip-us.apache.org/repos/asf/chukwa/tree/2d20ab59
Diff: http://git-wip-us.apache.org/repos/asf/chukwa/diff/2d20ab59
Branch: refs/heads/master
Commit: 2d20ab592b0db8abcad09f616e803f085dcb407a
Parents: b2bcd77
Author: Eric Yang <ey...@apache.org>
Authored: Thu Jun 18 18:19:05 2015 -0700
Committer: Eric Yang <ey...@apache.org>
Committed: Thu Jun 18 18:19:05 2015 -0700
----------------------------------------------------------------------
CHANGES.txt | 2 +
contrib/solr/logs/conf/schema.xml | 8 ++-
.../datacollection/writer/solr/SolrWriter.java | 52 ++++++++++++++++----
3 files changed, 51 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/chukwa/blob/2d20ab59/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a8c032d..26b9aa9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -14,6 +14,8 @@ Trunk (unreleased changes)
IMPROVEMENTS
+ CHUKWA-749. Added Chukwa tags to Solr schema for indexing logs. (Eric Yang)
+
CHUKWA-754. Improved graph explorer selection box performance. (Eric Yang)
CHUKWA-745. Improved chart configuration management. (Eric Yang)
http://git-wip-us.apache.org/repos/asf/chukwa/blob/2d20ab59/contrib/solr/logs/conf/schema.xml
----------------------------------------------------------------------
diff --git a/contrib/solr/logs/conf/schema.xml b/contrib/solr/logs/conf/schema.xml
index 2a22977..07daad1 100644
--- a/contrib/solr/logs/conf/schema.xml
+++ b/contrib/solr/logs/conf/schema.xml
@@ -111,12 +111,15 @@
installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
-->
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
- <field name="seqId" type="string" indexed="true" stored="true" omitNorms="true"/>
- <field name="type" type="text_general" indexed="true" stored="true" omitNorms="true"/>
+ <field name="seqId" type="long" indexed="true" stored="true" required="true" multiValued="false" />
+ <field name="type" type="string" indexed="true" stored="true" omitNorms="true"/>
<field name="stream" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="tags" type="text_en_splitting" indexed="true" stored="true" multiValued="true"/>
<field name="source" type="string" indexed="true" stored="true"/>
<field name="data" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
+ <field name="service" type="string" indexed="true" stored="true" multiValued="true" omitNorms="true" />
+ <field name="user" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true" />
+ <field name="date" type="pdate" indexed="true" stored="true" multiValued="false" omitNorms="true" termVectors="true" />
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
@@ -204,6 +207,7 @@
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
+ <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" />
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
is a more restricted form of the canonical representation of dateTime
http://git-wip-us.apache.org/repos/asf/chukwa/blob/2d20ab59/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/SolrWriter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/SolrWriter.java b/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/SolrWriter.java
index 2644eb8..f67fe87 100644
--- a/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/SolrWriter.java
+++ b/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/SolrWriter.java
@@ -17,7 +17,14 @@
*/
package org.apache.hadoop.chukwa.datacollection.writer.solr;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
import java.util.List;
+import java.util.TimeZone;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.hadoop.chukwa.Chunk;
import org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent;
@@ -33,13 +40,18 @@ import org.apache.solr.common.SolrInputDocument;
public class SolrWriter extends PipelineableWriter {
private static Logger log = Logger.getLogger(SolrWriter.class);
private static CloudSolrServer server;
- private static String ID = "id";
- private static String SEQ_ID = "seqId";
- private static String DATA_TYPE = "type";
- private static String STREAM_NAME = "stream";
- private static String TAGS = "tags";
- private static String SOURCE = "source";
- private static String DATA = "data";
+ private final static String ID = "id";
+ private final static String SEQ_ID = "seqId";
+ private final static String DATA_TYPE = "type";
+ private final static String STREAM_NAME = "stream";
+ private final static String TAGS = "tags";
+ private final static String SOURCE = "source";
+ private final static String DATA = "data";
+ private final static String USER = "user";
+ private final static String SERVICE = "service";
+ private final static String DATE = "date";
+ private final static Pattern userPattern = Pattern.compile("user=(.+?)[, ]");
+ private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
public SolrWriter() throws WriterException {
init(ChukwaAgent.getStaticConfiguration());
@@ -73,11 +85,33 @@ public class SolrWriter extends PipelineableWriter {
doc.addField(SEQ_ID, chunk.getSeqID());
doc.addField(DATA_TYPE, chunk.getDataType());
doc.addField(DATA, new String(chunk.getData()));
+
+ // TODO: improve parsing logic for more sophisticated tagging
+ String data = new String(chunk.getData());
+ Matcher m = userPattern.matcher(data);
+ if(m.find()) {
+ doc.addField(USER, m.group(1));
+ }
+ if(data.contains("hdfs")) {
+ doc.addField(SERVICE, "hdfs");
+ }
+ if(data.contains("yarn")) {
+ doc.addField(SERVICE, "yarn");
+ }
+ if(data.contains("mapredice")) {
+ doc.addField(SERVICE, "mapreduce");
+ }
+ try {
+ Date d = sdf.parse(data);
+ doc.addField(DATE, d, 1.0f);
+ } catch(ParseException e) {
+
+ }
server.add(doc);
server.commit();
} catch (Exception e) {
- log.error(ExceptionUtil.getStackTrace(e));
- throw new WriterException("Failed to store data to Solr Cloud.");
+ log.warn("Failed to store data to Solr Cloud.");
+ log.warn(ExceptionUtil.getStackTrace(e));
}
}
if (next != null) {