You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@eagle.apache.org by ha...@apache.org on 2016/10/17 03:42:28 UTC
incubator-eagle git commit: [EAGLE-615] Jsoup parse hive sql return
String without line break "\n"
Repository: incubator-eagle
Updated Branches:
refs/heads/master a6bc0a524 -> a710082d4
[EAGLE-615] Jsoup parse hive sql return String without line break "\n"
EAGLE-615 Jsoup parse hive sql return String without line break "\n"
- ADD "doc.outputSettings().prettyPrint(false);", get element value via getWholeText() not text()
Author: chitin <ch...@gmail.com>
Closes #499 from chitin/EAGLE615.
Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/a710082d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/a710082d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/a710082d
Branch: refs/heads/master
Commit: a710082d486e10b4732c00a06dd367dc556df60a
Parents: a6bc0a5
Author: chitin <ch...@gmail.com>
Authored: Mon Oct 17 11:42:27 2016 +0800
Committer: Hao Chen <ha...@apache.org>
Committed: Mon Oct 17 11:42:27 2016 +0800
----------------------------------------------------------------------
.../hive/jobrunning/HiveJobFetchSpout.java | 35 +++++++++++++++-----
1 file changed, 26 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/a710082d/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
----------------------------------------------------------------------
diff --git a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
index c0673b3..af4599b 100644
--- a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
+++ b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
@@ -22,6 +22,7 @@ import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
+import org.apache.commons.lang.StringUtils;
import org.apache.eagle.dataproc.impl.storm.ValuesArray;
import org.apache.eagle.jpm.util.*;
import org.apache.eagle.jpm.util.jobrecover.RunningJobManager;
@@ -35,12 +36,14 @@ import org.apache.eagle.security.hive.config.RunningJobCrawlConfig;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.map.ObjectMapper;
import org.jsoup.Jsoup;
+import org.jsoup.nodes.TextNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
+
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.InputStream;
@@ -62,6 +65,7 @@ public class HiveJobFetchSpout extends BaseRichSpout {
private Long lastFinishAppTime;
private RunningJobManager runningJobManager;
private int partitionId;
+
static {
OBJ_MAPPER.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
}
@@ -91,22 +95,22 @@ public class HiveJobFetchSpout extends BaseRichSpout {
// sanity verify 0<=partitionId<=numTotalPartitions-1
if (partitionId < 0 || partitionId > crawlConfig.controlConfig.numTotalPartitions) {
throw new IllegalStateException("partitionId should be less than numTotalPartitions with partitionId " +
- partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions);
+ partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions);
}
Class<? extends JobIdPartitioner> partitionerCls = crawlConfig.controlConfig.partitionerCls;
try {
this.jobFilter = new JobIdFilterByPartition(partitionerCls.newInstance(),
- crawlConfig.controlConfig.numTotalPartitions, partitionId);
+ crawlConfig.controlConfig.numTotalPartitions, partitionId);
} catch (Exception e) {
LOG.error("failing instantiating job partitioner class " + partitionerCls.getCanonicalName());
throw new IllegalStateException(e);
}
this.collector = collector;
this.runningJobManager = new RunningJobManager(crawlConfig.zkStateConfig.zkQuorum,
- crawlConfig.zkStateConfig.zkSessionTimeoutMs,
- crawlConfig.zkStateConfig.zkRetryTimes,
- crawlConfig.zkStateConfig.zkRetryInterval,
- crawlConfig.zkStateConfig.zkRoot);
+ crawlConfig.zkStateConfig.zkSessionTimeoutMs,
+ crawlConfig.zkStateConfig.zkRetryTimes,
+ crawlConfig.zkStateConfig.zkRetryInterval,
+ crawlConfig.zkStateConfig.zkRoot);
this.lastFinishAppTime = this.runningJobManager.recoverLastFinishedTime(partitionId);
if (this.lastFinishAppTime == 0l) {
this.lastFinishAppTime = Calendar.getInstance().getTimeInMillis() - 24 * 60 * 60000l;//one day ago
@@ -119,7 +123,7 @@ public class HiveJobFetchSpout extends BaseRichSpout {
LOG.info("start to fetch job list");
try {
List<AppInfo> apps = rmResourceFetcher.getResource(Constants.ResourceType.RUNNING_MR_JOB);
- if(apps == null){
+ if (apps == null) {
apps = new ArrayList<>();
}
handleApps(apps, true);
@@ -127,7 +131,7 @@ public class HiveJobFetchSpout extends BaseRichSpout {
long fetchTime = Calendar.getInstance().getTimeInMillis();
if (fetchTime - this.lastFinishAppTime > 60000l) {
apps = rmResourceFetcher.getResource(Constants.ResourceType.COMPLETE_MR_JOB, Long.toString(this.lastFinishAppTime));
- if(apps == null){
+ if (apps == null) {
apps = new ArrayList<>();
}
handleApps(apps, false);
@@ -230,6 +234,7 @@ public class HiveJobFetchSpout extends BaseRichSpout {
LOG.info("fetch job conf from {}", urlString);
is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE);
final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString);
+ doc.outputSettings().prettyPrint(false);
org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr");
Map<String, String> hiveQueryLog = new HashMap<>();
Iterator<org.jsoup.nodes.Element> iter = elements.iterator();
@@ -237,7 +242,19 @@ public class HiveJobFetchSpout extends BaseRichSpout {
org.jsoup.nodes.Element element = iter.next();
org.jsoup.select.Elements tds = element.children();
String key = tds.get(0).text();
- String value = tds.get(1).text();
+ String value = "";
+ org.jsoup.nodes.Element valueElement = tds.get(1);
+ if (Constants.HIVE_QUERY_STRING.equals(key)) {
+ for (org.jsoup.nodes.Node child : valueElement.childNodes()) {
+ if (child instanceof TextNode) {
+ TextNode valueTextNode = (TextNode) child;
+ value = valueTextNode.getWholeText();
+ value = StringUtils.strip(value);
+ }
+ }
+ } else {
+ value = valueElement.text();
+ }
hiveQueryLog.put(key, value);
}
if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) {